1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- from collections.abc import Generator
- from core.model_manager import ModelManager
- from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
- from core.plugin.entities.request import RequestInvokeLLM
- from core.workflow.nodes.llm.llm_node import LLMNode
- from models.account import Tenant
- class PluginBackwardsInvocation:
- @classmethod
- def invoke_llm(
- cls, user_id: str, tenant: Tenant, payload: RequestInvokeLLM
- ) -> Generator[LLMResultChunk, None, None] | LLMResult:
- """
- invoke llm
- """
- model_instance = ModelManager().get_model_instance(
- tenant_id=tenant.id,
- provider=payload.provider,
- model_type=payload.model_type,
- model=payload.model,
- )
- # invoke model
- response = model_instance.invoke_llm(
- prompt_messages=payload.prompt_messages,
- model_parameters=payload.model_parameters,
- tools=payload.tools,
- stop=payload.stop,
- stream=payload.stream or True,
- user=user_id,
- )
- if isinstance(response, Generator):
- def handle() -> Generator[LLMResultChunk, None, None]:
- for chunk in response:
- if chunk.delta.usage:
- LLMNode.deduct_llm_quota(
- tenant_id=tenant.id, model_instance=model_instance, usage=chunk.delta.usage
- )
- yield chunk
- return handle()
- else:
- if response.usage:
- LLMNode.deduct_llm_quota(tenant_id=tenant.id, model_instance=model_instance, usage=response.usage)
- return response
|