model.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. from collections.abc import Generator
  2. from core.model_manager import ModelManager
  3. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
  4. from core.plugin.entities.request import RequestInvokeLLM
  5. from core.workflow.nodes.llm.llm_node import LLMNode
  6. from models.account import Tenant
  7. class PluginBackwardsInvocation:
  8. @classmethod
  9. def invoke_llm(
  10. cls, user_id: str, tenant: Tenant, payload: RequestInvokeLLM
  11. ) -> Generator[LLMResultChunk, None, None] | LLMResult:
  12. """
  13. invoke llm
  14. """
  15. model_instance = ModelManager().get_model_instance(
  16. tenant_id=tenant.id,
  17. provider=payload.provider,
  18. model_type=payload.model_type,
  19. model=payload.model,
  20. )
  21. # invoke model
  22. response = model_instance.invoke_llm(
  23. prompt_messages=payload.prompt_messages,
  24. model_parameters=payload.model_parameters,
  25. tools=payload.tools,
  26. stop=payload.stop,
  27. stream=payload.stream or True,
  28. user=user_id,
  29. )
  30. if isinstance(response, Generator):
  31. def handle() -> Generator[LLMResultChunk, None, None]:
  32. for chunk in response:
  33. if chunk.delta.usage:
  34. LLMNode.deduct_llm_quota(
  35. tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage
  36. )
  37. yield chunk
  38. return handle()
  39. else:
  40. if response.usage:
  41. LLMNode.deduct_llm_quota(tenant_id=tenant.id, model_instance=model_instance, usage=response.usage)
  42. return response