base_app_runner.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. import time
  2. from collections.abc import Generator
  3. from typing import Optional, Union
  4. from core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntity
  5. from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
  6. from core.app.entities.app_invoke_entities import (
  7. AppGenerateEntity,
  8. EasyUIBasedAppGenerateEntity,
  9. InvokeFrom,
  10. ModelConfigWithCredentialsEntity,
  11. )
  12. from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueMessageEndEvent
  13. from core.app.features.annotation_reply.annotation_reply import AnnotationReplyFeature
  14. from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
  15. from core.external_data_tool.external_data_fetch import ExternalDataFetch
  16. from core.file.file_obj import FileVar
  17. from core.memory.token_buffer_memory import TokenBufferMemory
  18. from core.model_manager import ModelInstance
  19. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
  20. from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
  21. from core.model_runtime.entities.model_entities import ModelPropertyKey
  22. from core.model_runtime.errors.invoke import InvokeBadRequestError
  23. from core.moderation.input_moderation import InputModeration
  24. from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
  25. from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
  26. from core.prompt.simple_prompt_transform import ModelMode, SimplePromptTransform
  27. from models.model import App, AppMode, Message, MessageAnnotation
  28. class AppRunner:
  29. def get_pre_calculate_rest_tokens(self, app_record: App,
  30. model_config: ModelConfigWithCredentialsEntity,
  31. prompt_template_entity: PromptTemplateEntity,
  32. inputs: dict[str, str],
  33. files: list[FileVar],
  34. query: Optional[str] = None) -> int:
  35. """
  36. Get pre calculate rest tokens
  37. :param app_record: app record
  38. :param model_config: model config entity
  39. :param prompt_template_entity: prompt template entity
  40. :param inputs: inputs
  41. :param files: files
  42. :param query: query
  43. :return:
  44. """
  45. # Invoke model
  46. model_instance = ModelInstance(
  47. provider_model_bundle=model_config.provider_model_bundle,
  48. model=model_config.model
  49. )
  50. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  51. max_tokens = 0
  52. for parameter_rule in model_config.model_schema.parameter_rules:
  53. if (parameter_rule.name == 'max_tokens'
  54. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  55. max_tokens = (model_config.parameters.get(parameter_rule.name)
  56. or model_config.parameters.get(parameter_rule.use_template)) or 0
  57. if model_context_tokens is None:
  58. return -1
  59. if max_tokens is None:
  60. max_tokens = 0
  61. # get prompt messages without memory and context
  62. prompt_messages, stop = self.organize_prompt_messages(
  63. app_record=app_record,
  64. model_config=model_config,
  65. prompt_template_entity=prompt_template_entity,
  66. inputs=inputs,
  67. files=files,
  68. query=query
  69. )
  70. prompt_tokens = model_instance.get_llm_num_tokens(
  71. prompt_messages
  72. )
  73. rest_tokens = model_context_tokens - max_tokens - prompt_tokens
  74. if rest_tokens < 0:
  75. raise InvokeBadRequestError("Query or prefix prompt is too long, you can reduce the prefix prompt, "
  76. "or shrink the max token, or switch to a llm with a larger token limit size.")
  77. return rest_tokens
  78. def recalc_llm_max_tokens(self, model_config: ModelConfigWithCredentialsEntity,
  79. prompt_messages: list[PromptMessage]):
  80. # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
  81. model_instance = ModelInstance(
  82. provider_model_bundle=model_config.provider_model_bundle,
  83. model=model_config.model
  84. )
  85. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  86. max_tokens = 0
  87. for parameter_rule in model_config.model_schema.parameter_rules:
  88. if (parameter_rule.name == 'max_tokens'
  89. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  90. max_tokens = (model_config.parameters.get(parameter_rule.name)
  91. or model_config.parameters.get(parameter_rule.use_template)) or 0
  92. if model_context_tokens is None:
  93. return -1
  94. if max_tokens is None:
  95. max_tokens = 0
  96. prompt_tokens = model_instance.get_llm_num_tokens(
  97. prompt_messages
  98. )
  99. if prompt_tokens + max_tokens > model_context_tokens:
  100. max_tokens = max(model_context_tokens - prompt_tokens, 16)
  101. for parameter_rule in model_config.model_schema.parameter_rules:
  102. if (parameter_rule.name == 'max_tokens'
  103. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  104. model_config.parameters[parameter_rule.name] = max_tokens
  105. def organize_prompt_messages(self, app_record: App,
  106. model_config: ModelConfigWithCredentialsEntity,
  107. prompt_template_entity: PromptTemplateEntity,
  108. inputs: dict[str, str],
  109. files: list[FileVar],
  110. query: Optional[str] = None,
  111. context: Optional[str] = None,
  112. memory: Optional[TokenBufferMemory] = None) \
  113. -> tuple[list[PromptMessage], Optional[list[str]]]:
  114. """
  115. Organize prompt messages
  116. :param context:
  117. :param app_record: app record
  118. :param model_config: model config entity
  119. :param prompt_template_entity: prompt template entity
  120. :param inputs: inputs
  121. :param files: files
  122. :param query: query
  123. :param memory: memory
  124. :return:
  125. """
  126. # get prompt without memory and context
  127. if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
  128. prompt_transform = SimplePromptTransform()
  129. prompt_messages, stop = prompt_transform.get_prompt(
  130. app_mode=AppMode.value_of(app_record.mode),
  131. prompt_template_entity=prompt_template_entity,
  132. inputs=inputs,
  133. query=query if query else '',
  134. files=files,
  135. context=context,
  136. memory=memory,
  137. model_config=model_config
  138. )
  139. else:
  140. memory_config = MemoryConfig(
  141. window=MemoryConfig.WindowConfig(
  142. enabled=False
  143. )
  144. )
  145. model_mode = ModelMode.value_of(model_config.mode)
  146. if model_mode == ModelMode.COMPLETION:
  147. advanced_completion_prompt_template = prompt_template_entity.advanced_completion_prompt_template
  148. prompt_template = CompletionModelPromptTemplate(
  149. text=advanced_completion_prompt_template.prompt
  150. )
  151. if advanced_completion_prompt_template.role_prefix:
  152. memory_config.role_prefix = MemoryConfig.RolePrefix(
  153. user=advanced_completion_prompt_template.role_prefix.user,
  154. assistant=advanced_completion_prompt_template.role_prefix.assistant
  155. )
  156. else:
  157. prompt_template = []
  158. for message in prompt_template_entity.advanced_chat_prompt_template.messages:
  159. prompt_template.append(ChatModelMessage(
  160. text=message.text,
  161. role=message.role
  162. ))
  163. prompt_transform = AdvancedPromptTransform()
  164. prompt_messages = prompt_transform.get_prompt(
  165. prompt_template=prompt_template,
  166. inputs=inputs,
  167. query=query if query else '',
  168. files=files,
  169. context=context,
  170. memory_config=memory_config,
  171. memory=memory,
  172. model_config=model_config
  173. )
  174. stop = model_config.stop
  175. return prompt_messages, stop
  176. def direct_output(self, queue_manager: AppQueueManager,
  177. app_generate_entity: EasyUIBasedAppGenerateEntity,
  178. prompt_messages: list,
  179. text: str,
  180. stream: bool,
  181. usage: Optional[LLMUsage] = None) -> None:
  182. """
  183. Direct output
  184. :param queue_manager: application queue manager
  185. :param app_generate_entity: app generate entity
  186. :param prompt_messages: prompt messages
  187. :param text: text
  188. :param stream: stream
  189. :param usage: usage
  190. :return:
  191. """
  192. if stream:
  193. index = 0
  194. for token in text:
  195. chunk = LLMResultChunk(
  196. model=app_generate_entity.model_conf.model,
  197. prompt_messages=prompt_messages,
  198. delta=LLMResultChunkDelta(
  199. index=index,
  200. message=AssistantPromptMessage(content=token)
  201. )
  202. )
  203. queue_manager.publish(
  204. QueueLLMChunkEvent(
  205. chunk=chunk
  206. ), PublishFrom.APPLICATION_MANAGER
  207. )
  208. index += 1
  209. time.sleep(0.01)
  210. queue_manager.publish(
  211. QueueMessageEndEvent(
  212. llm_result=LLMResult(
  213. model=app_generate_entity.model_conf.model,
  214. prompt_messages=prompt_messages,
  215. message=AssistantPromptMessage(content=text),
  216. usage=usage if usage else LLMUsage.empty_usage()
  217. ),
  218. ), PublishFrom.APPLICATION_MANAGER
  219. )
  220. def _handle_invoke_result(self, invoke_result: Union[LLMResult, Generator],
  221. queue_manager: AppQueueManager,
  222. stream: bool,
  223. agent: bool = False) -> None:
  224. """
  225. Handle invoke result
  226. :param invoke_result: invoke result
  227. :param queue_manager: application queue manager
  228. :param stream: stream
  229. :return:
  230. """
  231. if not stream:
  232. self._handle_invoke_result_direct(
  233. invoke_result=invoke_result,
  234. queue_manager=queue_manager,
  235. agent=agent
  236. )
  237. else:
  238. self._handle_invoke_result_stream(
  239. invoke_result=invoke_result,
  240. queue_manager=queue_manager,
  241. agent=agent
  242. )
  243. def _handle_invoke_result_direct(self, invoke_result: LLMResult,
  244. queue_manager: AppQueueManager,
  245. agent: bool) -> None:
  246. """
  247. Handle invoke result direct
  248. :param invoke_result: invoke result
  249. :param queue_manager: application queue manager
  250. :return:
  251. """
  252. queue_manager.publish(
  253. QueueMessageEndEvent(
  254. llm_result=invoke_result,
  255. ), PublishFrom.APPLICATION_MANAGER
  256. )
  257. def _handle_invoke_result_stream(self, invoke_result: Generator,
  258. queue_manager: AppQueueManager,
  259. agent: bool) -> None:
  260. """
  261. Handle invoke result
  262. :param invoke_result: invoke result
  263. :param queue_manager: application queue manager
  264. :return:
  265. """
  266. model = None
  267. prompt_messages = []
  268. text = ''
  269. usage = None
  270. for result in invoke_result:
  271. if not agent:
  272. queue_manager.publish(
  273. QueueLLMChunkEvent(
  274. chunk=result
  275. ), PublishFrom.APPLICATION_MANAGER
  276. )
  277. else:
  278. queue_manager.publish(
  279. QueueAgentMessageEvent(
  280. chunk=result
  281. ), PublishFrom.APPLICATION_MANAGER
  282. )
  283. text += result.delta.message.content
  284. if not model:
  285. model = result.model
  286. if not prompt_messages:
  287. prompt_messages = result.prompt_messages
  288. if not usage and result.delta.usage:
  289. usage = result.delta.usage
  290. if not usage:
  291. usage = LLMUsage.empty_usage()
  292. llm_result = LLMResult(
  293. model=model,
  294. prompt_messages=prompt_messages,
  295. message=AssistantPromptMessage(content=text),
  296. usage=usage
  297. )
  298. queue_manager.publish(
  299. QueueMessageEndEvent(
  300. llm_result=llm_result,
  301. ), PublishFrom.APPLICATION_MANAGER
  302. )
  303. def moderation_for_inputs(
  304. self, app_id: str,
  305. tenant_id: str,
  306. app_generate_entity: AppGenerateEntity,
  307. inputs: dict,
  308. query: str,
  309. message_id: str,
  310. ) -> tuple[bool, dict, str]:
  311. """
  312. Process sensitive_word_avoidance.
  313. :param app_id: app id
  314. :param tenant_id: tenant id
  315. :param app_generate_entity: app generate entity
  316. :param inputs: inputs
  317. :param query: query
  318. :param message_id: message id
  319. :return:
  320. """
  321. moderation_feature = InputModeration()
  322. return moderation_feature.check(
  323. app_id=app_id,
  324. tenant_id=tenant_id,
  325. app_config=app_generate_entity.app_config,
  326. inputs=inputs,
  327. query=query if query else '',
  328. message_id=message_id,
  329. trace_manager=app_generate_entity.trace_manager
  330. )
  331. def check_hosting_moderation(self, application_generate_entity: EasyUIBasedAppGenerateEntity,
  332. queue_manager: AppQueueManager,
  333. prompt_messages: list[PromptMessage]) -> bool:
  334. """
  335. Check hosting moderation
  336. :param application_generate_entity: application generate entity
  337. :param queue_manager: queue manager
  338. :param prompt_messages: prompt messages
  339. :return:
  340. """
  341. hosting_moderation_feature = HostingModerationFeature()
  342. moderation_result = hosting_moderation_feature.check(
  343. application_generate_entity=application_generate_entity,
  344. prompt_messages=prompt_messages
  345. )
  346. if moderation_result:
  347. self.direct_output(
  348. queue_manager=queue_manager,
  349. app_generate_entity=application_generate_entity,
  350. prompt_messages=prompt_messages,
  351. text="I apologize for any confusion, " \
  352. "but I'm an AI assistant to be helpful, harmless, and honest.",
  353. stream=application_generate_entity.stream
  354. )
  355. return moderation_result
  356. def fill_in_inputs_from_external_data_tools(self, tenant_id: str,
  357. app_id: str,
  358. external_data_tools: list[ExternalDataVariableEntity],
  359. inputs: dict,
  360. query: str) -> dict:
  361. """
  362. Fill in variable inputs from external data tools if exists.
  363. :param tenant_id: workspace id
  364. :param app_id: app id
  365. :param external_data_tools: external data tools configs
  366. :param inputs: the inputs
  367. :param query: the query
  368. :return: the filled inputs
  369. """
  370. external_data_fetch_feature = ExternalDataFetch()
  371. return external_data_fetch_feature.fetch(
  372. tenant_id=tenant_id,
  373. app_id=app_id,
  374. external_data_tools=external_data_tools,
  375. inputs=inputs,
  376. query=query
  377. )
  378. def query_app_annotations_to_reply(self, app_record: App,
  379. message: Message,
  380. query: str,
  381. user_id: str,
  382. invoke_from: InvokeFrom) -> Optional[MessageAnnotation]:
  383. """
  384. Query app annotations to reply
  385. :param app_record: app record
  386. :param message: message
  387. :param query: query
  388. :param user_id: user id
  389. :param invoke_from: invoke from
  390. :return:
  391. """
  392. annotation_reply_feature = AnnotationReplyFeature()
  393. return annotation_reply_feature.query(
  394. app_record=app_record,
  395. message=message,
  396. query=query,
  397. user_id=user_id,
  398. invoke_from=invoke_from
  399. )