base_app_runner.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. import time
  2. from collections.abc import Generator
  3. from typing import TYPE_CHECKING, Optional, Union
  4. from core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntity
  5. from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
  6. from core.app.entities.app_invoke_entities import (
  7. AppGenerateEntity,
  8. EasyUIBasedAppGenerateEntity,
  9. InvokeFrom,
  10. ModelConfigWithCredentialsEntity,
  11. )
  12. from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueMessageEndEvent
  13. from core.app.features.annotation_reply.annotation_reply import AnnotationReplyFeature
  14. from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
  15. from core.external_data_tool.external_data_fetch import ExternalDataFetch
  16. from core.memory.token_buffer_memory import TokenBufferMemory
  17. from core.model_manager import ModelInstance
  18. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
  19. from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
  20. from core.model_runtime.entities.model_entities import ModelPropertyKey
  21. from core.model_runtime.errors.invoke import InvokeBadRequestError
  22. from core.moderation.input_moderation import InputModeration
  23. from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
  24. from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig
  25. from core.prompt.simple_prompt_transform import ModelMode, SimplePromptTransform
  26. from models.model import App, AppMode, Message, MessageAnnotation
  27. if TYPE_CHECKING:
  28. from core.file.file_obj import FileVar
  29. class AppRunner:
  30. def get_pre_calculate_rest_tokens(self, app_record: App,
  31. model_config: ModelConfigWithCredentialsEntity,
  32. prompt_template_entity: PromptTemplateEntity,
  33. inputs: dict[str, str],
  34. files: list["FileVar"],
  35. query: Optional[str] = None) -> int:
  36. """
  37. Get pre calculate rest tokens
  38. :param app_record: app record
  39. :param model_config: model config entity
  40. :param prompt_template_entity: prompt template entity
  41. :param inputs: inputs
  42. :param files: files
  43. :param query: query
  44. :return:
  45. """
  46. # Invoke model
  47. model_instance = ModelInstance(
  48. provider_model_bundle=model_config.provider_model_bundle,
  49. model=model_config.model
  50. )
  51. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  52. max_tokens = 0
  53. for parameter_rule in model_config.model_schema.parameter_rules:
  54. if (parameter_rule.name == 'max_tokens'
  55. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  56. max_tokens = (model_config.parameters.get(parameter_rule.name)
  57. or model_config.parameters.get(parameter_rule.use_template)) or 0
  58. if model_context_tokens is None:
  59. return -1
  60. if max_tokens is None:
  61. max_tokens = 0
  62. # get prompt messages without memory and context
  63. prompt_messages, stop = self.organize_prompt_messages(
  64. app_record=app_record,
  65. model_config=model_config,
  66. prompt_template_entity=prompt_template_entity,
  67. inputs=inputs,
  68. files=files,
  69. query=query
  70. )
  71. prompt_tokens = model_instance.get_llm_num_tokens(
  72. prompt_messages
  73. )
  74. rest_tokens = model_context_tokens - max_tokens - prompt_tokens
  75. if rest_tokens < 0:
  76. raise InvokeBadRequestError("Query or prefix prompt is too long, you can reduce the prefix prompt, "
  77. "or shrink the max token, or switch to a llm with a larger token limit size.")
  78. return rest_tokens
  79. def recalc_llm_max_tokens(self, model_config: ModelConfigWithCredentialsEntity,
  80. prompt_messages: list[PromptMessage]):
  81. # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
  82. model_instance = ModelInstance(
  83. provider_model_bundle=model_config.provider_model_bundle,
  84. model=model_config.model
  85. )
  86. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  87. max_tokens = 0
  88. for parameter_rule in model_config.model_schema.parameter_rules:
  89. if (parameter_rule.name == 'max_tokens'
  90. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  91. max_tokens = (model_config.parameters.get(parameter_rule.name)
  92. or model_config.parameters.get(parameter_rule.use_template)) or 0
  93. if model_context_tokens is None:
  94. return -1
  95. if max_tokens is None:
  96. max_tokens = 0
  97. prompt_tokens = model_instance.get_llm_num_tokens(
  98. prompt_messages
  99. )
  100. if prompt_tokens + max_tokens > model_context_tokens:
  101. max_tokens = max(model_context_tokens - prompt_tokens, 16)
  102. for parameter_rule in model_config.model_schema.parameter_rules:
  103. if (parameter_rule.name == 'max_tokens'
  104. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  105. model_config.parameters[parameter_rule.name] = max_tokens
  106. def organize_prompt_messages(self, app_record: App,
  107. model_config: ModelConfigWithCredentialsEntity,
  108. prompt_template_entity: PromptTemplateEntity,
  109. inputs: dict[str, str],
  110. files: list["FileVar"],
  111. query: Optional[str] = None,
  112. context: Optional[str] = None,
  113. memory: Optional[TokenBufferMemory] = None) \
  114. -> tuple[list[PromptMessage], Optional[list[str]]]:
  115. """
  116. Organize prompt messages
  117. :param context:
  118. :param app_record: app record
  119. :param model_config: model config entity
  120. :param prompt_template_entity: prompt template entity
  121. :param inputs: inputs
  122. :param files: files
  123. :param query: query
  124. :param memory: memory
  125. :return:
  126. """
  127. # get prompt without memory and context
  128. if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
  129. prompt_transform = SimplePromptTransform()
  130. prompt_messages, stop = prompt_transform.get_prompt(
  131. app_mode=AppMode.value_of(app_record.mode),
  132. prompt_template_entity=prompt_template_entity,
  133. inputs=inputs,
  134. query=query if query else '',
  135. files=files,
  136. context=context,
  137. memory=memory,
  138. model_config=model_config
  139. )
  140. else:
  141. memory_config = MemoryConfig(
  142. window=MemoryConfig.WindowConfig(
  143. enabled=False
  144. )
  145. )
  146. model_mode = ModelMode.value_of(model_config.mode)
  147. if model_mode == ModelMode.COMPLETION:
  148. advanced_completion_prompt_template = prompt_template_entity.advanced_completion_prompt_template
  149. prompt_template = CompletionModelPromptTemplate(
  150. text=advanced_completion_prompt_template.prompt
  151. )
  152. if advanced_completion_prompt_template.role_prefix:
  153. memory_config.role_prefix = MemoryConfig.RolePrefix(
  154. user=advanced_completion_prompt_template.role_prefix.user,
  155. assistant=advanced_completion_prompt_template.role_prefix.assistant
  156. )
  157. else:
  158. prompt_template = []
  159. for message in prompt_template_entity.advanced_chat_prompt_template.messages:
  160. prompt_template.append(ChatModelMessage(
  161. text=message.text,
  162. role=message.role
  163. ))
  164. prompt_transform = AdvancedPromptTransform()
  165. prompt_messages = prompt_transform.get_prompt(
  166. prompt_template=prompt_template,
  167. inputs=inputs,
  168. query=query if query else '',
  169. files=files,
  170. context=context,
  171. memory_config=memory_config,
  172. memory=memory,
  173. model_config=model_config
  174. )
  175. stop = model_config.stop
  176. return prompt_messages, stop
  177. def direct_output(self, queue_manager: AppQueueManager,
  178. app_generate_entity: EasyUIBasedAppGenerateEntity,
  179. prompt_messages: list,
  180. text: str,
  181. stream: bool,
  182. usage: Optional[LLMUsage] = None) -> None:
  183. """
  184. Direct output
  185. :param queue_manager: application queue manager
  186. :param app_generate_entity: app generate entity
  187. :param prompt_messages: prompt messages
  188. :param text: text
  189. :param stream: stream
  190. :param usage: usage
  191. :return:
  192. """
  193. if stream:
  194. index = 0
  195. for token in text:
  196. chunk = LLMResultChunk(
  197. model=app_generate_entity.model_conf.model,
  198. prompt_messages=prompt_messages,
  199. delta=LLMResultChunkDelta(
  200. index=index,
  201. message=AssistantPromptMessage(content=token)
  202. )
  203. )
  204. queue_manager.publish(
  205. QueueLLMChunkEvent(
  206. chunk=chunk
  207. ), PublishFrom.APPLICATION_MANAGER
  208. )
  209. index += 1
  210. time.sleep(0.01)
  211. queue_manager.publish(
  212. QueueMessageEndEvent(
  213. llm_result=LLMResult(
  214. model=app_generate_entity.model_conf.model,
  215. prompt_messages=prompt_messages,
  216. message=AssistantPromptMessage(content=text),
  217. usage=usage if usage else LLMUsage.empty_usage()
  218. ),
  219. ), PublishFrom.APPLICATION_MANAGER
  220. )
  221. def _handle_invoke_result(self, invoke_result: Union[LLMResult, Generator],
  222. queue_manager: AppQueueManager,
  223. stream: bool,
  224. agent: bool = False) -> None:
  225. """
  226. Handle invoke result
  227. :param invoke_result: invoke result
  228. :param queue_manager: application queue manager
  229. :param stream: stream
  230. :param agent: agent
  231. :return:
  232. """
  233. if not stream:
  234. self._handle_invoke_result_direct(
  235. invoke_result=invoke_result,
  236. queue_manager=queue_manager,
  237. agent=agent
  238. )
  239. else:
  240. self._handle_invoke_result_stream(
  241. invoke_result=invoke_result,
  242. queue_manager=queue_manager,
  243. agent=agent
  244. )
  245. def _handle_invoke_result_direct(self, invoke_result: LLMResult,
  246. queue_manager: AppQueueManager,
  247. agent: bool) -> None:
  248. """
  249. Handle invoke result direct
  250. :param invoke_result: invoke result
  251. :param queue_manager: application queue manager
  252. :param agent: agent
  253. :return:
  254. """
  255. queue_manager.publish(
  256. QueueMessageEndEvent(
  257. llm_result=invoke_result,
  258. ), PublishFrom.APPLICATION_MANAGER
  259. )
  260. def _handle_invoke_result_stream(self, invoke_result: Generator,
  261. queue_manager: AppQueueManager,
  262. agent: bool) -> None:
  263. """
  264. Handle invoke result
  265. :param invoke_result: invoke result
  266. :param queue_manager: application queue manager
  267. :param agent: agent
  268. :return:
  269. """
  270. model = None
  271. prompt_messages = []
  272. text = ''
  273. usage = None
  274. for result in invoke_result:
  275. if not agent:
  276. queue_manager.publish(
  277. QueueLLMChunkEvent(
  278. chunk=result
  279. ), PublishFrom.APPLICATION_MANAGER
  280. )
  281. else:
  282. queue_manager.publish(
  283. QueueAgentMessageEvent(
  284. chunk=result
  285. ), PublishFrom.APPLICATION_MANAGER
  286. )
  287. text += result.delta.message.content
  288. if not model:
  289. model = result.model
  290. if not prompt_messages:
  291. prompt_messages = result.prompt_messages
  292. if not usage and result.delta.usage:
  293. usage = result.delta.usage
  294. if not usage:
  295. usage = LLMUsage.empty_usage()
  296. llm_result = LLMResult(
  297. model=model,
  298. prompt_messages=prompt_messages,
  299. message=AssistantPromptMessage(content=text),
  300. usage=usage
  301. )
  302. queue_manager.publish(
  303. QueueMessageEndEvent(
  304. llm_result=llm_result,
  305. ), PublishFrom.APPLICATION_MANAGER
  306. )
  307. def moderation_for_inputs(
  308. self, app_id: str,
  309. tenant_id: str,
  310. app_generate_entity: AppGenerateEntity,
  311. inputs: dict,
  312. query: str,
  313. message_id: str,
  314. ) -> tuple[bool, dict, str]:
  315. """
  316. Process sensitive_word_avoidance.
  317. :param app_id: app id
  318. :param tenant_id: tenant id
  319. :param app_generate_entity: app generate entity
  320. :param inputs: inputs
  321. :param query: query
  322. :param message_id: message id
  323. :return:
  324. """
  325. moderation_feature = InputModeration()
  326. return moderation_feature.check(
  327. app_id=app_id,
  328. tenant_id=tenant_id,
  329. app_config=app_generate_entity.app_config,
  330. inputs=inputs,
  331. query=query if query else '',
  332. message_id=message_id,
  333. trace_manager=app_generate_entity.trace_manager
  334. )
  335. def check_hosting_moderation(self, application_generate_entity: EasyUIBasedAppGenerateEntity,
  336. queue_manager: AppQueueManager,
  337. prompt_messages: list[PromptMessage]) -> bool:
  338. """
  339. Check hosting moderation
  340. :param application_generate_entity: application generate entity
  341. :param queue_manager: queue manager
  342. :param prompt_messages: prompt messages
  343. :return:
  344. """
  345. hosting_moderation_feature = HostingModerationFeature()
  346. moderation_result = hosting_moderation_feature.check(
  347. application_generate_entity=application_generate_entity,
  348. prompt_messages=prompt_messages
  349. )
  350. if moderation_result:
  351. self.direct_output(
  352. queue_manager=queue_manager,
  353. app_generate_entity=application_generate_entity,
  354. prompt_messages=prompt_messages,
  355. text="I apologize for any confusion, " \
  356. "but I'm an AI assistant to be helpful, harmless, and honest.",
  357. stream=application_generate_entity.stream
  358. )
  359. return moderation_result
  360. def fill_in_inputs_from_external_data_tools(self, tenant_id: str,
  361. app_id: str,
  362. external_data_tools: list[ExternalDataVariableEntity],
  363. inputs: dict,
  364. query: str) -> dict:
  365. """
  366. Fill in variable inputs from external data tools if exists.
  367. :param tenant_id: workspace id
  368. :param app_id: app id
  369. :param external_data_tools: external data tools configs
  370. :param inputs: the inputs
  371. :param query: the query
  372. :return: the filled inputs
  373. """
  374. external_data_fetch_feature = ExternalDataFetch()
  375. return external_data_fetch_feature.fetch(
  376. tenant_id=tenant_id,
  377. app_id=app_id,
  378. external_data_tools=external_data_tools,
  379. inputs=inputs,
  380. query=query
  381. )
  382. def query_app_annotations_to_reply(self, app_record: App,
  383. message: Message,
  384. query: str,
  385. user_id: str,
  386. invoke_from: InvokeFrom) -> Optional[MessageAnnotation]:
  387. """
  388. Query app annotations to reply
  389. :param app_record: app record
  390. :param message: message
  391. :param query: query
  392. :param user_id: user id
  393. :param invoke_from: invoke from
  394. :return:
  395. """
  396. annotation_reply_feature = AnnotationReplyFeature()
  397. return annotation_reply_feature.query(
  398. app_record=app_record,
  399. message=message,
  400. query=query,
  401. user_id=user_id,
  402. invoke_from=invoke_from
  403. )