model.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. from collections.abc import Generator
  2. from core.model_manager import ModelManager
  3. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
  4. from core.plugin.backwards_invocation.base import BaseBackwardsInvocation
  5. from core.plugin.entities.request import RequestInvokeLLM
  6. from core.workflow.nodes.llm.llm_node import LLMNode
  7. from models.account import Tenant
  8. class PluginModelBackwardsInvocation(BaseBackwardsInvocation):
  9. @classmethod
  10. def invoke_llm(
  11. cls, user_id: str, tenant: Tenant, payload: RequestInvokeLLM
  12. ) -> Generator[LLMResultChunk, None, None] | LLMResult:
  13. """
  14. invoke llm
  15. """
  16. model_instance = ModelManager().get_model_instance(
  17. tenant_id=tenant.id,
  18. provider=payload.provider,
  19. model_type=payload.model_type,
  20. model=payload.model,
  21. )
  22. # invoke model
  23. response = model_instance.invoke_llm(
  24. prompt_messages=payload.prompt_messages,
  25. model_parameters=payload.model_parameters,
  26. tools=payload.tools,
  27. stop=payload.stop,
  28. stream=payload.stream or True,
  29. user=user_id,
  30. )
  31. if isinstance(response, Generator):
  32. def handle() -> Generator[LLMResultChunk, None, None]:
  33. for chunk in response:
  34. if chunk.delta.usage:
  35. LLMNode.deduct_llm_quota(
  36. tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage
  37. )
  38. yield chunk
  39. return handle()
  40. else:
  41. if response.usage:
  42. LLMNode.deduct_llm_quota(tenant_id=tenant.id, model_instance=model_instance, usage=response.usage)
  43. return response