Source code for aisert.validators.token_validator.token_validator_base
import logging
[docs]
class TokenValidatorBase:
"""
Abstract base class for token counting implementations across different AI providers.
Provides a common interface for token counting while allowing provider-specific
implementations (OpenAI, Anthropic, HuggingFace, Google). Each provider has
different tokenization methods and APIs.
Subclasses must implement:
- get_instance(): Factory method for singleton pattern
- count(): Token counting logic for the specific provider
Example:
class MyTokenValidator(TokenValidatorBase):
def count(self, text: str) -> int:
return len(text.split()) # Simple word count
"""
[docs]
def __init__(self):
"""
Initialize base token validator.
"""
super().__init__()
self.logger = logging.getLogger(self.__class__.__name__)
def _get_encoding(self):
"""
Get the encoding client for tokenization (provider-specific).
Must be implemented by subclasses to return the appropriate
tokenizer/encoder for their specific provider.
Returns:
Provider-specific encoding client
Raises:
NotImplementedError: If subclass doesn't implement this method
"""
raise NotImplementedError("Subclasses must implement the _get_encoding method.")
[docs]
@classmethod
def get_instance(cls, **kwargs):
"""
Factory method to get validator instance (typically singleton).
Should be implemented by subclasses to return cached instances
for performance, as tokenizers can be expensive to initialize.
Args:
**kwargs: Provider-specific configuration parameters
Returns:
Instance of the token validator
Raises:
NotImplementedError: If subclass doesn't implement this method
"""
raise NotImplementedError("Subclasses must implement the get_instance method.")
[docs]
def count(self, text: str) -> int:
"""
Count tokens in the provided text using provider-specific logic.
Args:
text: Input text to count tokens for
Returns:
Number of tokens in the text
Raises:
NotImplementedError: If subclass doesn't implement this method
Example:
validator = OpenAITokenValidator.get_instance(token_model="gpt-4")
count = validator.count("Hello world") # Returns token count
"""
raise NotImplementedError("Subclasses must implement the count method.")