| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427 | import timefrom collections.abc import Generator, Mappingfrom typing import TYPE_CHECKING, Any, Optional, Unionfrom core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntityfrom core.app.apps.base_app_queue_manager import AppQueueManager, PublishFromfrom core.app.entities.app_invoke_entities import (    AppGenerateEntity,    EasyUIBasedAppGenerateEntity,    InvokeFrom,    ModelConfigWithCredentialsEntity,)from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueMessageEndEventfrom core.app.features.annotation_reply.annotation_reply import AnnotationReplyFeaturefrom core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeaturefrom core.external_data_tool.external_data_fetch import ExternalDataFetchfrom core.memory.token_buffer_memory import TokenBufferMemoryfrom core.model_manager import ModelInstancefrom core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsagefrom core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessagefrom core.model_runtime.entities.model_entities import ModelPropertyKeyfrom core.model_runtime.errors.invoke import InvokeBadRequestErrorfrom core.moderation.input_moderation import InputModerationfrom core.prompt.advanced_prompt_transform import AdvancedPromptTransformfrom core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfigfrom core.prompt.simple_prompt_transform import ModelMode, SimplePromptTransformfrom models.model import App, AppMode, Message, MessageAnnotationif TYPE_CHECKING:    from core.file.models import Fileclass AppRunner:    def get_pre_calculate_rest_tokens(        self,        app_record: App,        model_config: ModelConfigWithCredentialsEntity,        prompt_template_entity: PromptTemplateEntity,        inputs: dict[str, str],        files: list["File"],        query: Optional[str] = None,    ) -> int:        """        Get pre calculate rest tokens        :param app_record: app record        :param model_config: model config entity        :param prompt_template_entity: prompt template entity        :param inputs: inputs        :param files: files        :param query: query        :return:        """        # Invoke model        model_instance = ModelInstance(            provider_model_bundle=model_config.provider_model_bundle, model=model_config.model        )        model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)        max_tokens = 0        for parameter_rule in model_config.model_schema.parameter_rules:            if parameter_rule.name == "max_tokens" or (                parameter_rule.use_template and parameter_rule.use_template == "max_tokens"            ):                max_tokens = (                    model_config.parameters.get(parameter_rule.name)                    or model_config.parameters.get(parameter_rule.use_template)                ) or 0        if model_context_tokens is None:            return -1        if max_tokens is None:            max_tokens = 0        # get prompt messages without memory and context        prompt_messages, stop = self.organize_prompt_messages(            app_record=app_record,            model_config=model_config,            prompt_template_entity=prompt_template_entity,            inputs=inputs,            files=files,            query=query,        )        prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)        rest_tokens = model_context_tokens - max_tokens - prompt_tokens        if rest_tokens < 0:            raise InvokeBadRequestError(                "Query or prefix prompt is too long, you can reduce the prefix prompt, "                "or shrink the max token, or switch to a llm with a larger token limit size."            )        return rest_tokens    def recalc_llm_max_tokens(        self, model_config: ModelConfigWithCredentialsEntity, prompt_messages: list[PromptMessage]    ):        # recalc max_tokens if sum(prompt_token +  max_tokens) over model token limit        model_instance = ModelInstance(            provider_model_bundle=model_config.provider_model_bundle, model=model_config.model        )        model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)        max_tokens = 0        for parameter_rule in model_config.model_schema.parameter_rules:            if parameter_rule.name == "max_tokens" or (                parameter_rule.use_template and parameter_rule.use_template == "max_tokens"            ):                max_tokens = (                    model_config.parameters.get(parameter_rule.name)                    or model_config.parameters.get(parameter_rule.use_template)                ) or 0        if model_context_tokens is None:            return -1        if max_tokens is None:            max_tokens = 0        prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)        if prompt_tokens + max_tokens > model_context_tokens:            max_tokens = max(model_context_tokens - prompt_tokens, 16)            for parameter_rule in model_config.model_schema.parameter_rules:                if parameter_rule.name == "max_tokens" or (                    parameter_rule.use_template and parameter_rule.use_template == "max_tokens"                ):                    model_config.parameters[parameter_rule.name] = max_tokens    def organize_prompt_messages(        self,        app_record: App,        model_config: ModelConfigWithCredentialsEntity,        prompt_template_entity: PromptTemplateEntity,        inputs: dict[str, str],        files: list["File"],        query: Optional[str] = None,        context: Optional[str] = None,        memory: Optional[TokenBufferMemory] = None,    ) -> tuple[list[PromptMessage], Optional[list[str]]]:        """        Organize prompt messages        :param context:        :param app_record: app record        :param model_config: model config entity        :param prompt_template_entity: prompt template entity        :param inputs: inputs        :param files: files        :param query: query        :param memory: memory        :return:        """        # get prompt without memory and context        if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:            prompt_transform = SimplePromptTransform()            prompt_messages, stop = prompt_transform.get_prompt(                app_mode=AppMode.value_of(app_record.mode),                prompt_template_entity=prompt_template_entity,                inputs=inputs,                query=query or "",                files=files,                context=context,                memory=memory,                model_config=model_config,            )        else:            memory_config = MemoryConfig(window=MemoryConfig.WindowConfig(enabled=False))            model_mode = ModelMode.value_of(model_config.mode)            if model_mode == ModelMode.COMPLETION:                advanced_completion_prompt_template = prompt_template_entity.advanced_completion_prompt_template                prompt_template = CompletionModelPromptTemplate(text=advanced_completion_prompt_template.prompt)                if advanced_completion_prompt_template.role_prefix:                    memory_config.role_prefix = MemoryConfig.RolePrefix(                        user=advanced_completion_prompt_template.role_prefix.user,                        assistant=advanced_completion_prompt_template.role_prefix.assistant,                    )            else:                prompt_template = []                for message in prompt_template_entity.advanced_chat_prompt_template.messages:                    prompt_template.append(ChatModelMessage(text=message.text, role=message.role))            prompt_transform = AdvancedPromptTransform()            prompt_messages = prompt_transform.get_prompt(                prompt_template=prompt_template,                inputs=inputs,                query=query or "",                files=files,                context=context,                memory_config=memory_config,                memory=memory,                model_config=model_config,            )            stop = model_config.stop        return prompt_messages, stop    def direct_output(        self,        queue_manager: AppQueueManager,        app_generate_entity: EasyUIBasedAppGenerateEntity,        prompt_messages: list,        text: str,        stream: bool,        usage: Optional[LLMUsage] = None,    ) -> None:        """        Direct output        :param queue_manager: application queue manager        :param app_generate_entity: app generate entity        :param prompt_messages: prompt messages        :param text: text        :param stream: stream        :param usage: usage        :return:        """        if stream:            index = 0            for token in text:                chunk = LLMResultChunk(                    model=app_generate_entity.model_conf.model,                    prompt_messages=prompt_messages,                    delta=LLMResultChunkDelta(index=index, message=AssistantPromptMessage(content=token)),                )                queue_manager.publish(QueueLLMChunkEvent(chunk=chunk), PublishFrom.APPLICATION_MANAGER)                index += 1                time.sleep(0.01)        queue_manager.publish(            QueueMessageEndEvent(                llm_result=LLMResult(                    model=app_generate_entity.model_conf.model,                    prompt_messages=prompt_messages,                    message=AssistantPromptMessage(content=text),                    usage=usage or LLMUsage.empty_usage(),                ),            ),            PublishFrom.APPLICATION_MANAGER,        )    def _handle_invoke_result(        self,        invoke_result: Union[LLMResult, Generator],        queue_manager: AppQueueManager,        stream: bool,        agent: bool = False,    ) -> None:        """        Handle invoke result        :param invoke_result: invoke result        :param queue_manager: application queue manager        :param stream: stream        :param agent: agent        :return:        """        if not stream:            self._handle_invoke_result_direct(invoke_result=invoke_result, queue_manager=queue_manager, agent=agent)        else:            self._handle_invoke_result_stream(invoke_result=invoke_result, queue_manager=queue_manager, agent=agent)    def _handle_invoke_result_direct(        self, invoke_result: LLMResult, queue_manager: AppQueueManager, agent: bool    ) -> None:        """        Handle invoke result direct        :param invoke_result: invoke result        :param queue_manager: application queue manager        :param agent: agent        :return:        """        queue_manager.publish(            QueueMessageEndEvent(                llm_result=invoke_result,            ),            PublishFrom.APPLICATION_MANAGER,        )    def _handle_invoke_result_stream(        self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool    ) -> None:        """        Handle invoke result        :param invoke_result: invoke result        :param queue_manager: application queue manager        :param agent: agent        :return:        """        model = None        prompt_messages = []        text = ""        usage = None        for result in invoke_result:            if not agent:                queue_manager.publish(QueueLLMChunkEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)            else:                queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)            text += result.delta.message.content            if not model:                model = result.model            if not prompt_messages:                prompt_messages = result.prompt_messages            if result.delta.usage:                usage = result.delta.usage        if not usage:            usage = LLMUsage.empty_usage()        llm_result = LLMResult(            model=model, prompt_messages=prompt_messages, message=AssistantPromptMessage(content=text), usage=usage        )        queue_manager.publish(            QueueMessageEndEvent(                llm_result=llm_result,            ),            PublishFrom.APPLICATION_MANAGER,        )    def moderation_for_inputs(        self,        app_id: str,        tenant_id: str,        app_generate_entity: AppGenerateEntity,        inputs: Mapping[str, Any],        query: str,        message_id: str,    ) -> tuple[bool, dict, str]:        """        Process sensitive_word_avoidance.        :param app_id: app id        :param tenant_id: tenant id        :param app_generate_entity: app generate entity        :param inputs: inputs        :param query: query        :param message_id: message id        :return:        """        moderation_feature = InputModeration()        return moderation_feature.check(            app_id=app_id,            tenant_id=tenant_id,            app_config=app_generate_entity.app_config,            inputs=inputs,            query=query or "",            message_id=message_id,            trace_manager=app_generate_entity.trace_manager,        )    def check_hosting_moderation(        self,        application_generate_entity: EasyUIBasedAppGenerateEntity,        queue_manager: AppQueueManager,        prompt_messages: list[PromptMessage],    ) -> bool:        """        Check hosting moderation        :param application_generate_entity: application generate entity        :param queue_manager: queue manager        :param prompt_messages: prompt messages        :return:        """        hosting_moderation_feature = HostingModerationFeature()        moderation_result = hosting_moderation_feature.check(            application_generate_entity=application_generate_entity, prompt_messages=prompt_messages        )        if moderation_result:            self.direct_output(                queue_manager=queue_manager,                app_generate_entity=application_generate_entity,                prompt_messages=prompt_messages,                text="I apologize for any confusion, but I'm an AI assistant to be helpful, harmless, and honest.",                stream=application_generate_entity.stream,            )        return moderation_result    def fill_in_inputs_from_external_data_tools(        self,        tenant_id: str,        app_id: str,        external_data_tools: list[ExternalDataVariableEntity],        inputs: dict,        query: str,    ) -> dict:        """        Fill in variable inputs from external data tools if exists.        :param tenant_id: workspace id        :param app_id: app id        :param external_data_tools: external data tools configs        :param inputs: the inputs        :param query: the query        :return: the filled inputs        """        external_data_fetch_feature = ExternalDataFetch()        return external_data_fetch_feature.fetch(            tenant_id=tenant_id, app_id=app_id, external_data_tools=external_data_tools, inputs=inputs, query=query        )    def query_app_annotations_to_reply(        self, app_record: App, message: Message, query: str, user_id: str, invoke_from: InvokeFrom    ) -> Optional[MessageAnnotation]:        """        Query app annotations to reply        :param app_record: app record        :param message: message        :param query: query        :param user_id: user id        :param invoke_from: invoke from        :return:        """        annotation_reply_feature = AnnotationReplyFeature()        return annotation_reply_feature.query(            app_record=app_record, message=message, query=query, user_id=user_id, invoke_from=invoke_from        )
 |