base_app_runner.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. import time
  2. from collections.abc import Generator, Mapping, Sequence
  3. from typing import TYPE_CHECKING, Any, Optional, Union
  4. from core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntity
  5. from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
  6. from core.app.entities.app_invoke_entities import (
  7. AppGenerateEntity,
  8. EasyUIBasedAppGenerateEntity,
  9. InvokeFrom,
  10. ModelConfigWithCredentialsEntity,
  11. )
  12. from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueMessageEndEvent
  13. from core.app.features.annotation_reply.annotation_reply import AnnotationReplyFeature
  14. from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
  15. from core.external_data_tool.external_data_fetch import ExternalDataFetch
  16. from core.memory.token_buffer_memory import TokenBufferMemory
  17. from core.model_manager import ModelInstance
  18. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
  19. from core.model_runtime.entities.message_entities import (
  20. AssistantPromptMessage,
  21. ImagePromptMessageContent,
  22. PromptMessage,
  23. )
  24. from core.model_runtime.entities.model_entities import ModelPropertyKey
  25. from core.model_runtime.errors.invoke import InvokeBadRequestError
  26. from core.moderation.input_moderation import InputModeration
  27. from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
  28. from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
  29. from core.prompt.simple_prompt_transform import ModelMode, SimplePromptTransform
  30. from models.model import App, AppMode, Message, MessageAnnotation
  31. if TYPE_CHECKING:
  32. from core.file.models import File
  33. class AppRunner:
  34. def get_pre_calculate_rest_tokens(
  35. self,
  36. app_record: App,
  37. model_config: ModelConfigWithCredentialsEntity,
  38. prompt_template_entity: PromptTemplateEntity,
  39. inputs: Mapping[str, str],
  40. files: Sequence["File"],
  41. query: Optional[str] = None,
  42. ) -> int:
  43. """
  44. Get pre calculate rest tokens
  45. :param app_record: app record
  46. :param model_config: model config entity
  47. :param prompt_template_entity: prompt template entity
  48. :param inputs: inputs
  49. :param files: files
  50. :param query: query
  51. :return:
  52. """
  53. # Invoke model
  54. model_instance = ModelInstance(
  55. provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
  56. )
  57. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  58. max_tokens = 0
  59. for parameter_rule in model_config.model_schema.parameter_rules:
  60. if parameter_rule.name == "max_tokens" or (
  61. parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
  62. ):
  63. max_tokens = (
  64. model_config.parameters.get(parameter_rule.name)
  65. or model_config.parameters.get(parameter_rule.use_template or "")
  66. ) or 0
  67. if model_context_tokens is None:
  68. return -1
  69. if max_tokens is None:
  70. max_tokens = 0
  71. # get prompt messages without memory and context
  72. prompt_messages, stop = self.organize_prompt_messages(
  73. app_record=app_record,
  74. model_config=model_config,
  75. prompt_template_entity=prompt_template_entity,
  76. inputs=inputs,
  77. files=files,
  78. query=query,
  79. )
  80. prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
  81. rest_tokens: int = model_context_tokens - max_tokens - prompt_tokens
  82. if rest_tokens < 0:
  83. raise InvokeBadRequestError(
  84. "Query or prefix prompt is too long, you can reduce the prefix prompt, "
  85. "or shrink the max token, or switch to a llm with a larger token limit size."
  86. )
  87. return rest_tokens
  88. def recalc_llm_max_tokens(
  89. self, model_config: ModelConfigWithCredentialsEntity, prompt_messages: list[PromptMessage]
  90. ):
  91. # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
  92. model_instance = ModelInstance(
  93. provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
  94. )
  95. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  96. max_tokens = 0
  97. for parameter_rule in model_config.model_schema.parameter_rules:
  98. if parameter_rule.name == "max_tokens" or (
  99. parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
  100. ):
  101. max_tokens = (
  102. model_config.parameters.get(parameter_rule.name)
  103. or model_config.parameters.get(parameter_rule.use_template or "")
  104. ) or 0
  105. if model_context_tokens is None:
  106. return -1
  107. if max_tokens is None:
  108. max_tokens = 0
  109. prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
  110. if prompt_tokens + max_tokens > model_context_tokens:
  111. max_tokens = max(model_context_tokens - prompt_tokens, 16)
  112. for parameter_rule in model_config.model_schema.parameter_rules:
  113. if parameter_rule.name == "max_tokens" or (
  114. parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
  115. ):
  116. model_config.parameters[parameter_rule.name] = max_tokens
  117. def organize_prompt_messages(
  118. self,
  119. app_record: App,
  120. model_config: ModelConfigWithCredentialsEntity,
  121. prompt_template_entity: PromptTemplateEntity,
  122. inputs: Mapping[str, str],
  123. files: Sequence["File"],
  124. query: Optional[str] = None,
  125. context: Optional[str] = None,
  126. memory: Optional[TokenBufferMemory] = None,
  127. image_detail_config: Optional[ImagePromptMessageContent.DETAIL] = None,
  128. ) -> tuple[list[PromptMessage], Optional[list[str]]]:
  129. """
  130. Organize prompt messages
  131. :param context:
  132. :param app_record: app record
  133. :param model_config: model config entity
  134. :param prompt_template_entity: prompt template entity
  135. :param inputs: inputs
  136. :param files: files
  137. :param query: query
  138. :param memory: memory
  139. :return:
  140. """
  141. # get prompt without memory and context
  142. if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
  143. prompt_transform: Union[SimplePromptTransform, AdvancedPromptTransform]
  144. prompt_transform = SimplePromptTransform()
  145. prompt_messages, stop = prompt_transform.get_prompt(
  146. app_mode=AppMode.value_of(app_record.mode),
  147. prompt_template_entity=prompt_template_entity,
  148. inputs=inputs,
  149. query=query or "",
  150. files=files,
  151. context=context,
  152. memory=memory,
  153. model_config=model_config,
  154. image_detail_config=image_detail_config,
  155. )
  156. else:
  157. memory_config = MemoryConfig(window=MemoryConfig.WindowConfig(enabled=False))
  158. model_mode = ModelMode.value_of(model_config.mode)
  159. prompt_template: Union[CompletionModelPromptTemplate, list[ChatModelMessage]]
  160. if model_mode == ModelMode.COMPLETION:
  161. advanced_completion_prompt_template = prompt_template_entity.advanced_completion_prompt_template
  162. if not advanced_completion_prompt_template:
  163. raise InvokeBadRequestError("Advanced completion prompt template is required.")
  164. prompt_template = CompletionModelPromptTemplate(text=advanced_completion_prompt_template.prompt)
  165. if advanced_completion_prompt_template.role_prefix:
  166. memory_config.role_prefix = MemoryConfig.RolePrefix(
  167. user=advanced_completion_prompt_template.role_prefix.user,
  168. assistant=advanced_completion_prompt_template.role_prefix.assistant,
  169. )
  170. else:
  171. if not prompt_template_entity.advanced_chat_prompt_template:
  172. raise InvokeBadRequestError("Advanced chat prompt template is required.")
  173. prompt_template = []
  174. for message in prompt_template_entity.advanced_chat_prompt_template.messages:
  175. prompt_template.append(ChatModelMessage(text=message.text, role=message.role))
  176. prompt_transform = AdvancedPromptTransform()
  177. prompt_messages = prompt_transform.get_prompt(
  178. prompt_template=prompt_template,
  179. inputs=inputs,
  180. query=query or "",
  181. files=files,
  182. context=context,
  183. memory_config=memory_config,
  184. memory=memory,
  185. model_config=model_config,
  186. image_detail_config=image_detail_config,
  187. )
  188. stop = model_config.stop
  189. return prompt_messages, stop
  190. def direct_output(
  191. self,
  192. queue_manager: AppQueueManager,
  193. app_generate_entity: EasyUIBasedAppGenerateEntity,
  194. prompt_messages: list,
  195. text: str,
  196. stream: bool,
  197. usage: Optional[LLMUsage] = None,
  198. ) -> None:
  199. """
  200. Direct output
  201. :param queue_manager: application queue manager
  202. :param app_generate_entity: app generate entity
  203. :param prompt_messages: prompt messages
  204. :param text: text
  205. :param stream: stream
  206. :param usage: usage
  207. :return:
  208. """
  209. if stream:
  210. index = 0
  211. for token in text:
  212. chunk = LLMResultChunk(
  213. model=app_generate_entity.model_conf.model,
  214. prompt_messages=prompt_messages,
  215. delta=LLMResultChunkDelta(index=index, message=AssistantPromptMessage(content=token)),
  216. )
  217. queue_manager.publish(QueueLLMChunkEvent(chunk=chunk), PublishFrom.APPLICATION_MANAGER)
  218. index += 1
  219. time.sleep(0.01)
  220. queue_manager.publish(
  221. QueueMessageEndEvent(
  222. llm_result=LLMResult(
  223. model=app_generate_entity.model_conf.model,
  224. prompt_messages=prompt_messages,
  225. message=AssistantPromptMessage(content=text),
  226. usage=usage or LLMUsage.empty_usage(),
  227. ),
  228. ),
  229. PublishFrom.APPLICATION_MANAGER,
  230. )
  231. def _handle_invoke_result(
  232. self,
  233. invoke_result: Union[LLMResult, Generator[Any, None, None]],
  234. queue_manager: AppQueueManager,
  235. stream: bool,
  236. agent: bool = False,
  237. ) -> None:
  238. """
  239. Handle invoke result
  240. :param invoke_result: invoke result
  241. :param queue_manager: application queue manager
  242. :param stream: stream
  243. :param agent: agent
  244. :return:
  245. """
  246. if not stream and isinstance(invoke_result, LLMResult):
  247. self._handle_invoke_result_direct(invoke_result=invoke_result, queue_manager=queue_manager, agent=agent)
  248. elif stream and isinstance(invoke_result, Generator):
  249. self._handle_invoke_result_stream(invoke_result=invoke_result, queue_manager=queue_manager, agent=agent)
  250. else:
  251. raise NotImplementedError(f"unsupported invoke result type: {type(invoke_result)}")
  252. def _handle_invoke_result_direct(
  253. self, invoke_result: LLMResult, queue_manager: AppQueueManager, agent: bool
  254. ) -> None:
  255. """
  256. Handle invoke result direct
  257. :param invoke_result: invoke result
  258. :param queue_manager: application queue manager
  259. :param agent: agent
  260. :return:
  261. """
  262. queue_manager.publish(
  263. QueueMessageEndEvent(
  264. llm_result=invoke_result,
  265. ),
  266. PublishFrom.APPLICATION_MANAGER,
  267. )
  268. def _handle_invoke_result_stream(
  269. self, invoke_result: Generator, queue_manager: AppQueueManager, agent: bool
  270. ) -> None:
  271. """
  272. Handle invoke result
  273. :param invoke_result: invoke result
  274. :param queue_manager: application queue manager
  275. :param agent: agent
  276. :return:
  277. """
  278. model: str = ""
  279. prompt_messages: list[PromptMessage] = []
  280. text = ""
  281. usage = None
  282. for result in invoke_result:
  283. if not agent:
  284. queue_manager.publish(QueueLLMChunkEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)
  285. else:
  286. queue_manager.publish(QueueAgentMessageEvent(chunk=result), PublishFrom.APPLICATION_MANAGER)
  287. text += result.delta.message.content
  288. if not model:
  289. model = result.model
  290. if not prompt_messages:
  291. prompt_messages = result.prompt_messages
  292. if result.delta.usage:
  293. usage = result.delta.usage
  294. if not usage:
  295. usage = LLMUsage.empty_usage()
  296. llm_result = LLMResult(
  297. model=model, prompt_messages=prompt_messages, message=AssistantPromptMessage(content=text), usage=usage
  298. )
  299. queue_manager.publish(
  300. QueueMessageEndEvent(
  301. llm_result=llm_result,
  302. ),
  303. PublishFrom.APPLICATION_MANAGER,
  304. )
  305. def moderation_for_inputs(
  306. self,
  307. *,
  308. app_id: str,
  309. tenant_id: str,
  310. app_generate_entity: AppGenerateEntity,
  311. inputs: Mapping[str, Any],
  312. query: str | None = None,
  313. message_id: str,
  314. ) -> tuple[bool, Mapping[str, Any], str]:
  315. """
  316. Process sensitive_word_avoidance.
  317. :param app_id: app id
  318. :param tenant_id: tenant id
  319. :param app_generate_entity: app generate entity
  320. :param inputs: inputs
  321. :param query: query
  322. :param message_id: message id
  323. :return:
  324. """
  325. moderation_feature = InputModeration()
  326. return moderation_feature.check(
  327. app_id=app_id,
  328. tenant_id=tenant_id,
  329. app_config=app_generate_entity.app_config,
  330. inputs=dict(inputs),
  331. query=query or "",
  332. message_id=message_id,
  333. trace_manager=app_generate_entity.trace_manager,
  334. )
  335. def check_hosting_moderation(
  336. self,
  337. application_generate_entity: EasyUIBasedAppGenerateEntity,
  338. queue_manager: AppQueueManager,
  339. prompt_messages: list[PromptMessage],
  340. ) -> bool:
  341. """
  342. Check hosting moderation
  343. :param application_generate_entity: application generate entity
  344. :param queue_manager: queue manager
  345. :param prompt_messages: prompt messages
  346. :return:
  347. """
  348. hosting_moderation_feature = HostingModerationFeature()
  349. moderation_result = hosting_moderation_feature.check(
  350. application_generate_entity=application_generate_entity, prompt_messages=prompt_messages
  351. )
  352. if moderation_result:
  353. self.direct_output(
  354. queue_manager=queue_manager,
  355. app_generate_entity=application_generate_entity,
  356. prompt_messages=prompt_messages,
  357. text="I apologize for any confusion, but I'm an AI assistant to be helpful, harmless, and honest.",
  358. stream=application_generate_entity.stream,
  359. )
  360. return moderation_result
  361. def fill_in_inputs_from_external_data_tools(
  362. self,
  363. tenant_id: str,
  364. app_id: str,
  365. external_data_tools: list[ExternalDataVariableEntity],
  366. inputs: Mapping[str, Any],
  367. query: str,
  368. ) -> Mapping[str, Any]:
  369. """
  370. Fill in variable inputs from external data tools if exists.
  371. :param tenant_id: workspace id
  372. :param app_id: app id
  373. :param external_data_tools: external data tools configs
  374. :param inputs: the inputs
  375. :param query: the query
  376. :return: the filled inputs
  377. """
  378. external_data_fetch_feature = ExternalDataFetch()
  379. return external_data_fetch_feature.fetch(
  380. tenant_id=tenant_id, app_id=app_id, external_data_tools=external_data_tools, inputs=inputs, query=query
  381. )
  382. def query_app_annotations_to_reply(
  383. self, app_record: App, message: Message, query: str, user_id: str, invoke_from: InvokeFrom
  384. ) -> Optional[MessageAnnotation]:
  385. """
  386. Query app annotations to reply
  387. :param app_record: app record
  388. :param message: message
  389. :param query: query
  390. :param user_id: user id
  391. :param invoke_from: invoke from
  392. :return:
  393. """
  394. annotation_reply_feature = AnnotationReplyFeature()
  395. return annotation_reply_feature.query(
  396. app_record=app_record, message=message, query=query, user_id=user_id, invoke_from=invoke_from
  397. )