12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- from core.model_runtime.model_providers.huggingface_tei.tei_helper import TeiModelExtraParameter
- class MockTEIClass:
- @staticmethod
- def get_tei_extra_parameter(server_url: str, model_name: str) -> TeiModelExtraParameter:
- # During mock, we don't have a real server to query, so we just return a dummy value
- if 'rerank' in model_name:
- model_type = 'reranker'
- else:
- model_type = 'embedding'
- return TeiModelExtraParameter(model_type=model_type, max_input_length=512, max_client_batch_size=1)
- @staticmethod
- def invoke_tokenize(server_url: str, texts: list[str]) -> list[list[dict]]:
- # Use space as token separator, and split the text into tokens
- tokenized_texts = []
- for text in texts:
- tokens = text.split(' ')
- current_index = 0
- tokenized_text = []
- for idx, token in enumerate(tokens):
- s_token = {
- 'id': idx,
- 'text': token,
- 'special': False,
- 'start': current_index,
- 'stop': current_index + len(token),
- }
- current_index += len(token) + 1
- tokenized_text.append(s_token)
- tokenized_texts.append(tokenized_text)
- return tokenized_texts
- @staticmethod
- def invoke_embeddings(server_url: str, texts: list[str]) -> dict:
- # {
- # "object": "list",
- # "data": [
- # {
- # "object": "embedding",
- # "embedding": [...],
- # "index": 0
- # }
- # ],
- # "model": "MODEL_NAME",
- # "usage": {
- # "prompt_tokens": 3,
- # "total_tokens": 3
- # }
- # }
- embeddings = []
- for idx, text in enumerate(texts):
- embedding = [0.1] * 768
- embeddings.append(
- {
- 'object': 'embedding',
- 'embedding': embedding,
- 'index': idx,
- }
- )
- return {
- 'object': 'list',
- 'data': embeddings,
- 'model': 'MODEL_NAME',
- 'usage': {
- 'prompt_tokens': sum(len(text.split(' ')) for text in texts),
- 'total_tokens': sum(len(text.split(' ')) for text in texts),
- },
- }
- def invoke_rerank(server_url: str, query: str, texts: list[str]) -> list[dict]:
- # Example response:
- # [
- # {
- # "index": 0,
- # "text": "Deep Learning is ...",
- # "score": 0.9950755
- # }
- # ]
- reranked_docs = []
- for idx, text in enumerate(texts):
- reranked_docs.append(
- {
- 'index': idx,
- 'text': text,
- 'score': 0.9,
- }
- )
- # For mock, only return the first document
- break
- return reranked_docs
|