| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 | """    For some reason, model will be used in tools like WebScraperTool, WikipediaSearchTool etc.    Therefore, a model manager is needed to list/invoke/validate models."""import jsonfrom typing import castfrom core.model_manager import ModelManagerfrom core.model_runtime.entities.llm_entities import LLMResultfrom core.model_runtime.entities.message_entities import PromptMessagefrom core.model_runtime.entities.model_entities import ModelTypefrom core.model_runtime.errors.invoke import (    InvokeAuthorizationError,    InvokeBadRequestError,    InvokeConnectionError,    InvokeRateLimitError,    InvokeServerUnavailableError,)from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel, ModelPropertyKeyfrom core.model_runtime.utils.encoders import jsonable_encoderfrom core.tools.model.errors import InvokeModelErrorfrom extensions.ext_database import dbfrom models.tools import ToolModelInvokeclass ToolModelManager:    @staticmethod    def get_max_llm_context_tokens(        tenant_id: str,    ) -> int:        """            get max llm context tokens of the model        """        model_manager = ModelManager()        model_instance = model_manager.get_default_model_instance(            tenant_id=tenant_id, model_type=ModelType.LLM,        )        if not model_instance:            raise InvokeModelError('Model not found')                llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)        schema = llm_model.get_model_schema(model_instance.model, model_instance.credentials)        if not schema:            raise InvokeModelError('No model schema found')        max_tokens = schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE, None)        if max_tokens is None:            return 2048                return max_tokens    @staticmethod    def calculate_tokens(        tenant_id: str,        prompt_messages: list[PromptMessage]    ) -> int:        """            calculate tokens from prompt messages and model parameters        """        # get model instance        model_manager = ModelManager()        model_instance = model_manager.get_default_model_instance(            tenant_id=tenant_id, model_type=ModelType.LLM        )        if not model_instance:            raise InvokeModelError('Model not found')                llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)        # get tokens        tokens = llm_model.get_num_tokens(model_instance.model, model_instance.credentials, prompt_messages)        return tokens    @staticmethod    def invoke(        user_id: str, tenant_id: str,        tool_type: str, tool_name: str,        prompt_messages: list[PromptMessage]    ) -> LLMResult:        """        invoke model with parameters in user's own context        :param user_id: user id        :param tenant_id: tenant id, the tenant id of the creator of the tool        :param tool_provider: tool provider        :param tool_id: tool id        :param tool_name: tool name        :param provider: model provider        :param model: model name        :param model_parameters: model parameters        :param prompt_messages: prompt messages        :return: AssistantPromptMessage        """        # get model manager        model_manager = ModelManager()        # get model instance        model_instance = model_manager.get_default_model_instance(            tenant_id=tenant_id, model_type=ModelType.LLM,        )        llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)        # get model credentials        model_credentials = model_instance.credentials        # get prompt tokens        prompt_tokens = llm_model.get_num_tokens(model_instance.model, model_credentials, prompt_messages)        model_parameters = {            'temperature': 0.8,            'top_p': 0.8,        }        # create tool model invoke        tool_model_invoke = ToolModelInvoke(            user_id=user_id,            tenant_id=tenant_id,            provider=model_instance.provider,            tool_type=tool_type,            tool_name=tool_name,            model_parameters=json.dumps(model_parameters),            prompt_messages=json.dumps(jsonable_encoder(prompt_messages)),            model_response='',            prompt_tokens=prompt_tokens,            answer_tokens=0,            answer_unit_price=0,            answer_price_unit=0,            provider_response_latency=0,            total_price=0,            currency='USD',        )        db.session.add(tool_model_invoke)        db.session.commit()        try:            response: LLMResult = llm_model.invoke(                model=model_instance.model,                credentials=model_credentials,                prompt_messages=prompt_messages,                model_parameters=model_parameters,                tools=[], stop=[], stream=False, user=user_id, callbacks=[]            )        except InvokeRateLimitError as e:            raise InvokeModelError(f'Invoke rate limit error: {e}')        except InvokeBadRequestError as e:            raise InvokeModelError(f'Invoke bad request error: {e}')        except InvokeConnectionError as e:            raise InvokeModelError(f'Invoke connection error: {e}')        except InvokeAuthorizationError as e:            raise InvokeModelError('Invoke authorization error')        except InvokeServerUnavailableError as e:            raise InvokeModelError(f'Invoke server unavailable error: {e}')        except Exception as e:            raise InvokeModelError(f'Invoke error: {e}')        # update tool model invoke        tool_model_invoke.model_response = response.message.content        if response.usage:            tool_model_invoke.answer_tokens = response.usage.completion_tokens            tool_model_invoke.answer_unit_price = response.usage.completion_unit_price            tool_model_invoke.answer_price_unit = response.usage.completion_price_unit            tool_model_invoke.provider_response_latency = response.usage.latency            tool_model_invoke.total_price = response.usage.total_price            tool_model_invoke.currency = response.usage.currency        db.session.commit()        return response
 |