Source code for aisert.validators.token_validator.token_validator_base

import logging



[docs]
class TokenValidatorBase:
    """
    Abstract base class for token counting implementations across different AI providers.
    
    Provides a common interface for token counting while allowing provider-specific
    implementations (OpenAI, Anthropic, HuggingFace, Google). Each provider has
    different tokenization methods and APIs.
    
    Subclasses must implement:
    - get_instance(): Factory method for singleton pattern
    - count(): Token counting logic for the specific provider
    
    Example:
        class MyTokenValidator(TokenValidatorBase):
            def count(self, text: str) -> int:
                return len(text.split())  # Simple word count
    """


[docs]
    def __init__(self):
        """
        Initialize base token validator.
        """
        super().__init__()
        self.logger = logging.getLogger(self.__class__.__name__)


    def _get_encoding(self):
        """
        Get the encoding client for tokenization (provider-specific).
        
        Must be implemented by subclasses to return the appropriate
        tokenizer/encoder for their specific provider.
        
        Returns:
            Provider-specific encoding client
        
        Raises:
            NotImplementedError: If subclass doesn't implement this method
        """
        raise NotImplementedError("Subclasses must implement the _get_encoding method.")


[docs]
    @classmethod
    def get_instance(cls, **kwargs):
        """
        Factory method to get validator instance (typically singleton).
        
        Should be implemented by subclasses to return cached instances
        for performance, as tokenizers can be expensive to initialize.
        
        Args:
            **kwargs: Provider-specific configuration parameters
        
        Returns:
            Instance of the token validator
        
        Raises:
            NotImplementedError: If subclass doesn't implement this method
        """
        raise NotImplementedError("Subclasses must implement the get_instance method.")



[docs]
    def count(self, text: str) -> int:
        """
        Count tokens in the provided text using provider-specific logic.
        
        Args:
            text: Input text to count tokens for
        
        Returns:
            Number of tokens in the text
        
        Raises:
            NotImplementedError: If subclass doesn't implement this method
        
        Example:
            validator = OpenAITokenValidator.get_instance(token_model="gpt-4")
            count = validator.count("Hello world")  # Returns token count
        """
        raise NotImplementedError("Subclasses must implement the count method.")
Aisert

Navigation

Related Topics

Source code for aisert.validators.token_validator.token_validator_base