openllm.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. from __future__ import annotations
  2. import logging
  3. from typing import (
  4. Any,
  5. Dict,
  6. List,
  7. Optional,
  8. )
  9. import requests
  10. from langchain.llms.utils import enforce_stop_tokens
  11. from pydantic import Field
  12. from langchain.callbacks.manager import (
  13. AsyncCallbackManagerForLLMRun,
  14. CallbackManagerForLLMRun,
  15. )
  16. from langchain.llms.base import LLM
  17. logger = logging.getLogger(__name__)
  18. class OpenLLM(LLM):
  19. """OpenLLM, supporting both in-process model
  20. instance and remote OpenLLM servers.
  21. If you have a OpenLLM server running, you can also use it remotely:
  22. .. code-block:: python
  23. from langchain.llms import OpenLLM
  24. llm = OpenLLM(server_url='http://localhost:3000')
  25. llm("What is the difference between a duck and a goose?")
  26. """
  27. server_url: Optional[str] = None
  28. """Optional server URL that currently runs a LLMServer with 'openllm start'."""
  29. llm_kwargs: Dict[str, Any] = Field(default_factory=dict)
  30. """Key word arguments to be passed to openllm.LLM"""
  31. @property
  32. def _llm_type(self) -> str:
  33. return "openllm"
  34. def _call(
  35. self,
  36. prompt: str,
  37. stop: Optional[List[str]] = None,
  38. run_manager: CallbackManagerForLLMRun | None = None,
  39. **kwargs: Any,
  40. ) -> str:
  41. params = {
  42. "prompt": prompt,
  43. "llm_config": self.llm_kwargs
  44. }
  45. headers = {"Content-Type": "application/json"}
  46. response = requests.post(
  47. f'{self.server_url}/v1/generate',
  48. headers=headers,
  49. json=params
  50. )
  51. if not response.ok:
  52. raise ValueError(f"OpenLLM HTTP {response.status_code} error: {response.text}")
  53. json_response = response.json()
  54. completion = json_response["responses"][0]
  55. if completion:
  56. completion = completion[len(prompt):]
  57. if stop is not None:
  58. completion = enforce_stop_tokens(completion, stop)
  59. return completion
  60. async def _acall(
  61. self,
  62. prompt: str,
  63. stop: Optional[List[str]] = None,
  64. run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
  65. **kwargs: Any,
  66. ) -> str:
  67. raise NotImplementedError(
  68. "Async call is not supported for OpenLLM at the moment."
  69. )