base_app_runner.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. import time
  2. from collections.abc import Generator, Mapping, Sequence
  3. from typing import TYPE_CHECKING, Any, Optional, Union
  4. from core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntity
  5. from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
  6. from core.app.entities.app_invoke_entities import (
  7. AppGenerateEntity,
  8. EasyUIBasedAppGenerateEntity,
  9. InvokeFrom,
  10. ModelConfigWithCredentialsEntity,
  11. )
  12. from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueMessageEndEvent
  13. from core.app.features.annotation_reply.annotation_reply import AnnotationReplyFeature
  14. from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
  15. from core.external_data_tool.external_data_fetch import ExternalDataFetch
  16. from core.memory.token_buffer_memory import TokenBufferMemory
  17. from core.model_manager import ModelInstance
  18. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
  19. from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
  20. from core.model_runtime.entities.model_entities import ModelPropertyKey
  21. from core.model_runtime.errors.invoke import InvokeBadRequestError
  22. from core.moderation.input_moderation import InputModeration
  23. from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
  24. from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
  25. from core.prompt.simple_prompt_transform import ModelMode, SimplePromptTransform
  26. from models.model import App, AppMode, Message, MessageAnnotation
  27. if TYPE_CHECKING:
  28. from core.file.models import File
  29. class AppRunner:
  30. def get_pre_calculate_rest_tokens(
  31. self,
  32. app_record: App,
  33. model_config: ModelConfigWithCredentialsEntity,
  34. prompt_template_entity: PromptTemplateEntity,
  35. inputs: Mapping[str, str],
  36. files: Sequence["File"],
  37. query: Optional[str] = None,
  38. ) -> int:
  39. """
  40. Get pre calculate rest tokens
  41. :param app_record: app record
  42. :param model_config: model config entity
  43. :param prompt_template_entity: prompt template entity
  44. :param inputs: inputs
  45. :param files: files
  46. :param query: query
  47. :return:
  48. """
  49. # Invoke model
  50. model_instance = ModelInstance(
  51. provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
  52. )
  53. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  54. max_tokens = 0
  55. for parameter_rule in model_config.model_schema.parameter_rules:
  56. if parameter_rule.name == "max_tokens" or (
  57. parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
  58. ):
  59. max_tokens = (
  60. model_config.parameters.get(parameter_rule.name)
  61. or model_config.parameters.get(parameter_rule.use_template or "")
  62. ) or 0
  63. if model_context_tokens is None:
  64. return -1
  65. if max_tokens is None:
  66. max_tokens = 0
  67. # get prompt messages without memory and context
  68. prompt_messages, stop = self.organize_prompt_messages(
  69. app_record=app_record,
  70. model_config=model_config,
  71. prompt_template_entity=prompt_template_entity,
  72. inputs=inputs,
  73. files=files,
  74. query=query,
  75. )
  76. prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
  77. rest_tokens: int = model_context_tokens - max_tokens - prompt_tokens
  78. if rest_tokens < 0:
  79. raise InvokeBadRequestError(
  80. "Query or prefix prompt is too long, you can reduce the prefix prompt, "
  81. "or shrink the max token, or switch to a llm with a larger token limit size."
  82. )
  83. return rest_tokens
  84. def recalc_llm_max_tokens(
  85. self, model_config: ModelConfigWithCredentialsEntity, prompt_messages: list[PromptMessage]
  86. ):
  87. # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
  88. model_instance = ModelInstance(
  89. provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
  90. )
  91. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  92. max_tokens = 0
  93. for parameter_rule in model_config.model_schema.parameter_rules:
  94. if parameter_rule.name == "max_tokens" or (
  95. parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
  96. ):
  97. max_tokens = (
  98. model_config.parameters.get(parameter_rule.name)
  99. or model_config.parameters.get(parameter_rule.use_template or "")
  100. ) or 0
  101. if model_context_tokens is None:
  102. return -1
  103. if max_tokens is None:
  104. max_tokens = 0
  105. prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
  106. if prompt_tokens + max_tokens > model_context_tokens:
  107. max_tokens = max(model_context_tokens - prompt_tokens, 16)
  108. for parameter_rule in model_config.model_schema.parameter_rules:
  109. if parameter_rule.name == "max_tokens" or (
  110. parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
  111. ):
  112. model_config.parameters[parameter_rule.name] = max_tokens
  113. def organize_prompt_messages(
  114. self,
  115. app_record: App,
  116. model_config: ModelConfigWithCredentialsEntity,
  117. prompt_template_entity: PromptTemplateEntity,
  118. inputs: Mapping[str, str],
  119. files: Sequence["File"],
  120. query: Optional[str] = None,
  121. context: Optional[str] = None,
  122. memory: Optional[TokenBufferMemory] = None,
  123. ) -> tuple[list[PromptMessage], Optional[list[str]]]:
  124. """
  125. Organize prompt messages
  126. :param context:
  127. :param app_record: app record
  128. :param model_config: model config entity
  129. :param prompt_template_entity: prompt template entity
  130. :param inputs: inputs
  131. :param files: files
  132. :param query: query
  133. :param memory: memory
  134. :return:
  135. """
  136. # get prompt without memory and context
  137. if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
  138. prompt_transform: Union[SimplePromptTransform, AdvancedPromptTransform]
  139. prompt_transform = SimplePromptTransform()
  140. prompt_messages, stop = prompt_transform.get_prompt(
  141. app_mode=AppMode.value_of(app_record.mode),
  142. prompt_template_entity=prompt_template_entity,
  143. inputs=inputs,
  144. query=query or "",
  145. files=files,
  146. context=context,
  147. memory=memory,
  148. model_config=model_config,
  149. )
  150. else:
  151. memory_config = MemoryConfig(window=MemoryConfig.WindowConfig(enabled=False))
  152. model_mode = ModelMode.value_of(model_config.mode)
  153. prompt_template: Union[CompletionModelPromptTemplate, list[ChatModelMessage]]
  154. if model_mode == ModelMode.COMPLETION:
  155. advanced_completion_prompt_template = prompt_template_entity.advanced_completion_prompt_template
  156. if not advanced_completion_prompt_template:
  157. raise InvokeBadRequestError("Advanced completion prompt template is required.")
  158. prompt_template = CompletionModelPromptTemplate(text=advanced_completion_prompt_template.prompt)
  159. if advanced_completion_prompt_template.role_prefix:
  160. memory_config.role_prefix = MemoryConfig.RolePrefix(
  161. user=advanced_completion_prompt_template.role_prefix.user,
  162. assistant=advanced_completion_prompt_template.role_prefix.assistant,
  163. )
  164. else:
  165. if not prompt_template_entity.advanced_chat_prompt_template:
  166. raise InvokeBadRequestError("Advanced chat prompt template is required.")
  167. prompt_template = []
  168. for message in prompt_template_entity.advanced_chat_prompt_template.messages:
  169. prompt_template.append(ChatModelMessage(text=message.text, role=message.role))
  170. prompt_transform = AdvancedPromptTransform()
  171. prompt_messages = prompt_transform.get_prompt(
  172. prompt_template=prompt_template,
  173. inputs=inputs,
  174. query=query or "",
  175. files=files,
  176. context=context,
  177. memory_config=memory_config,
  178. memory=memory,
  179. model_config=model_config,
  180. )
  181. stop = model_config.stop
  182. return prompt_messages, stop
  183. def direct_output(
  184. self,
  185. queue_manager: AppQueueManager,
  186. app_generate_entity: EasyUIBasedAppGenerateEntity,
  187. prompt_messages: list,
  188. text: str,
  189. stream: bool,
  190. usage: Optional[LLMUsage] = None,
  191. ) -> None:
  192. """
  193. Direct output
  194. :param queue_manager: application queue manager
  195. :param app_generate_entity: app generate entity
  196. :param prompt_messages: prompt messages
  197. :param text: text
  198. :param stream: stream
  199. :param usage: usage
  200. :return:
  201. """
  202. if stream:
  203. index = 0
  204. for token in text:
  205. chunk = LLMResultChunk(
  206. model=app_generate_entity.model_conf.model,
  207. prompt_messages=prompt_messages,
  208. delta=LLMResultChunkDelta(index=index, message=AssistantPromptMessage(content=token)),
  209. )
  210. queue_manager.publish(QueueLLMChunkEvent(chunk=chunk), PublishFrom.APPLICATION_MANAGER)
  211. index += 1
  212. time.sleep(0.01)
  213. queue_manager.publish(
  214. QueueMessageEndEvent(
  215. llm_result=LLMResult(
  216. model=app_generate_entity.model_conf.model,
  217. prompt_messages=prompt_messages,
  218. message=AssistantPromptMessage(content=text),
  219. usage=usage or LLMUsage.empty_usage(),
  220. ),
  221. ),
  222. PublishFrom.APPLICATION_MANAGER,
  223. )
  224. def _handle_invoke_result(
  225. self,
  226. invoke_result: Union[LLMResult, Generator[Any, None, None]],
  227. queue_manager: AppQueueManager,
  228. stream: bool,
  229. agent: bool = False,
  230. ) -> None:
  231. """
  232. Handle invoke result
  233. :param invoke_result: invoke result
  234. :param queue_manager: application queue manager
  235. :param stream: stream
  236. :param agent: agent
  237. :return:
  238. """
  239. if not stream and isinstance(invoke_result, LLMResult):
  240. self._handle_invoke_result_direct(invoke_result=invoke_result, queue_manager=queue_manager, agent=agent)
  241. elif stream and isinstance(invoke_result, Generator):
  242. self._handle_invoke_result_stream(invoke_result=invoke_result, queue_manager=queue_manager, agent=agent)
  243. else:
  244. raise NotImplementedError(f"unsupported invoke result type: {type(invoke_result)}")
  245. def _handle_invoke_result_direct(
  246. self, invoke_result: LLMResult, queue_manager: AppQueueManager, agent: bool
  247. ) -> None:
  248. """
  249. Handle invoke result direct
  250. :param invoke_result: invoke result
  251. :param queue_manager: application queue manager
  252. :param agent: agent
  253. :return:
  254. """
  255. queue_manager.publish(
  256. QueueMessageEndEvent(
  257. llm_result=invoke_result,
  258. ),
  259. PublishFrom.APPLICATION_MANAGER,
  260. )
  261. def _handle_invoke_result_stream(
  262. self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool
  263. ) -> None:
  264. """
  265. Handle invoke result
  266. :param invoke_result: invoke result
  267. :param queue_manager: application queue manager
  268. :param agent: agent
  269. :return:
  270. """
  271. model: str = ""
  272. prompt_messages: list[PromptMessage] = []
  273. text = ""
  274. usage = None
  275. for result in invoke_result:
  276. if not agent:
  277. queue_manager.publish(QueueLLMChunkEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)
  278. else:
  279. queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)
  280. text += result.delta.message.content
  281. if not model:
  282. model = result.model
  283. if not prompt_messages:
  284. prompt_messages = result.prompt_messages
  285. if result.delta.usage:
  286. usage = result.delta.usage
  287. if not usage:
  288. usage = LLMUsage.empty_usage()
  289. llm_result = LLMResult(
  290. model=model, prompt_messages=prompt_messages, message=AssistantPromptMessage(content=text), usage=usage
  291. )
  292. queue_manager.publish(
  293. QueueMessageEndEvent(
  294. llm_result=llm_result,
  295. ),
  296. PublishFrom.APPLICATION_MANAGER,
  297. )
  298. def moderation_for_inputs(
  299. self,
  300. *,
  301. app_id: str,
  302. tenant_id: str,
  303. app_generate_entity: AppGenerateEntity,
  304. inputs: Mapping[str, Any],
  305. query: str | None = None,
  306. message_id: str,
  307. ) -> tuple[bool, Mapping[str, Any], str]:
  308. """
  309. Process sensitive_word_avoidance.
  310. :param app_id: app id
  311. :param tenant_id: tenant id
  312. :param app_generate_entity: app generate entity
  313. :param inputs: inputs
  314. :param query: query
  315. :param message_id: message id
  316. :return:
  317. """
  318. moderation_feature = InputModeration()
  319. return moderation_feature.check(
  320. app_id=app_id,
  321. tenant_id=tenant_id,
  322. app_config=app_generate_entity.app_config,
  323. inputs=dict(inputs),
  324. query=query or "",
  325. message_id=message_id,
  326. trace_manager=app_generate_entity.trace_manager,
  327. )
  328. def check_hosting_moderation(
  329. self,
  330. application_generate_entity: EasyUIBasedAppGenerateEntity,
  331. queue_manager: AppQueueManager,
  332. prompt_messages: list[PromptMessage],
  333. ) -> bool:
  334. """
  335. Check hosting moderation
  336. :param application_generate_entity: application generate entity
  337. :param queue_manager: queue manager
  338. :param prompt_messages: prompt messages
  339. :return:
  340. """
  341. hosting_moderation_feature = HostingModerationFeature()
  342. moderation_result = hosting_moderation_feature.check(
  343. application_generate_entity=application_generate_entity, prompt_messages=prompt_messages
  344. )
  345. if moderation_result:
  346. self.direct_output(
  347. queue_manager=queue_manager,
  348. app_generate_entity=application_generate_entity,
  349. prompt_messages=prompt_messages,
  350. text="I apologize for any confusion, but I'm an AI assistant to be helpful, harmless, and honest.",
  351. stream=application_generate_entity.stream,
  352. )
  353. return moderation_result
  354. def fill_in_inputs_from_external_data_tools(
  355. self,
  356. tenant_id: str,
  357. app_id: str,
  358. external_data_tools: list[ExternalDataVariableEntity],
  359. inputs: Mapping[str, Any],
  360. query: str,
  361. ) -> Mapping[str, Any]:
  362. """
  363. Fill in variable inputs from external data tools if exists.
  364. :param tenant_id: workspace id
  365. :param app_id: app id
  366. :param external_data_tools: external data tools configs
  367. :param inputs: the inputs
  368. :param query: the query
  369. :return: the filled inputs
  370. """
  371. external_data_fetch_feature = ExternalDataFetch()
  372. return external_data_fetch_feature.fetch(
  373. tenant_id=tenant_id, app_id=app_id, external_data_tools=external_data_tools, inputs=inputs, query=query
  374. )
  375. def query_app_annotations_to_reply(
  376. self, app_record: App, message: Message, query: str, user_id: str, invoke_from: InvokeFrom
  377. ) -> Optional[MessageAnnotation]:
  378. """
  379. Query app annotations to reply
  380. :param app_record: app record
  381. :param message: message
  382. :param query: query
  383. :param user_id: user id
  384. :param invoke_from: invoke from
  385. :return:
  386. """
  387. annotation_reply_feature = AnnotationReplyFeature()
  388. return annotation_reply_feature.query(
  389. app_record=app_record, message=message, query=query, user_id=user_id, invoke_from=invoke_from
  390. )