xinference_provider.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. import json
  2. from typing import Type
  3. import requests
  4. from core.helper import encrypter
  5. from core.model_providers.models.embedding.xinference_embedding import XinferenceEmbedding
  6. from core.model_providers.models.entity.model_params import KwargRule, ModelKwargsRules, ModelType
  7. from core.model_providers.models.llm.xinference_model import XinferenceModel
  8. from core.model_providers.providers.base import BaseModelProvider, CredentialsValidateFailedError
  9. from core.model_providers.models.base import BaseProviderModel
  10. from core.third_party.langchain.llms.xinference_llm import XinferenceLLM
  11. from models.provider import ProviderType
  12. class XinferenceProvider(BaseModelProvider):
  13. @property
  14. def provider_name(self):
  15. """
  16. Returns the name of a provider.
  17. """
  18. return 'xinference'
  19. def _get_fixed_model_list(self, model_type: ModelType) -> list[dict]:
  20. return []
  21. def get_model_class(self, model_type: ModelType) -> Type[BaseProviderModel]:
  22. """
  23. Returns the model class.
  24. :param model_type:
  25. :return:
  26. """
  27. if model_type == ModelType.TEXT_GENERATION:
  28. model_class = XinferenceModel
  29. elif model_type == ModelType.EMBEDDINGS:
  30. model_class = XinferenceEmbedding
  31. else:
  32. raise NotImplementedError
  33. return model_class
  34. def get_model_parameter_rules(self, model_name: str, model_type: ModelType) -> ModelKwargsRules:
  35. """
  36. get model parameter rules.
  37. :param model_name:
  38. :param model_type:
  39. :return:
  40. """
  41. credentials = self.get_model_credentials(model_name, model_type)
  42. if credentials['model_format'] == "ggmlv3" and credentials["model_handle_type"] == "chatglm":
  43. return ModelKwargsRules(
  44. temperature=KwargRule[float](min=0.01, max=2, default=1),
  45. top_p=KwargRule[float](min=0, max=1, default=0.7),
  46. presence_penalty=KwargRule[float](enabled=False),
  47. frequency_penalty=KwargRule[float](enabled=False),
  48. max_tokens=KwargRule[int](min=10, max=4000, default=256),
  49. )
  50. elif credentials['model_format'] == "ggmlv3":
  51. return ModelKwargsRules(
  52. temperature=KwargRule[float](min=0.01, max=2, default=1),
  53. top_p=KwargRule[float](min=0, max=1, default=0.7),
  54. presence_penalty=KwargRule[float](min=-2, max=2, default=0),
  55. frequency_penalty=KwargRule[float](min=-2, max=2, default=0),
  56. max_tokens=KwargRule[int](min=10, max=4000, default=256),
  57. )
  58. else:
  59. return ModelKwargsRules(
  60. temperature=KwargRule[float](min=0.01, max=2, default=1),
  61. top_p=KwargRule[float](min=0, max=1, default=0.7),
  62. presence_penalty=KwargRule[float](enabled=False),
  63. frequency_penalty=KwargRule[float](enabled=False),
  64. max_tokens=KwargRule[int](min=10, max=4000, default=256),
  65. )
  66. @classmethod
  67. def is_model_credentials_valid_or_raise(cls, model_name: str, model_type: ModelType, credentials: dict):
  68. """
  69. check model credentials valid.
  70. :param model_name:
  71. :param model_type:
  72. :param credentials:
  73. """
  74. if 'server_url' not in credentials:
  75. raise CredentialsValidateFailedError('Xinference Server URL must be provided.')
  76. if 'model_uid' not in credentials:
  77. raise CredentialsValidateFailedError('Xinference Model UID must be provided.')
  78. try:
  79. credential_kwargs = {
  80. 'server_url': credentials['server_url'],
  81. 'model_uid': credentials['model_uid'],
  82. }
  83. llm = XinferenceLLM(
  84. **credential_kwargs
  85. )
  86. llm("ping")
  87. except Exception as ex:
  88. raise CredentialsValidateFailedError(str(ex))
  89. @classmethod
  90. def encrypt_model_credentials(cls, tenant_id: str, model_name: str, model_type: ModelType,
  91. credentials: dict) -> dict:
  92. """
  93. encrypt model credentials for save.
  94. :param tenant_id:
  95. :param model_name:
  96. :param model_type:
  97. :param credentials:
  98. :return:
  99. """
  100. extra_credentials = cls._get_extra_credentials(credentials)
  101. credentials.update(extra_credentials)
  102. credentials['server_url'] = encrypter.encrypt_token(tenant_id, credentials['server_url'])
  103. return credentials
  104. def get_model_credentials(self, model_name: str, model_type: ModelType, obfuscated: bool = False) -> dict:
  105. """
  106. get credentials for llm use.
  107. :param model_name:
  108. :param model_type:
  109. :param obfuscated:
  110. :return:
  111. """
  112. if self.provider.provider_type != ProviderType.CUSTOM.value:
  113. raise NotImplementedError
  114. provider_model = self._get_provider_model(model_name, model_type)
  115. if not provider_model.encrypted_config:
  116. return {
  117. 'server_url': None,
  118. 'model_uid': None,
  119. }
  120. credentials = json.loads(provider_model.encrypted_config)
  121. if credentials['server_url']:
  122. credentials['server_url'] = encrypter.decrypt_token(
  123. self.provider.tenant_id,
  124. credentials['server_url']
  125. )
  126. if obfuscated:
  127. credentials['server_url'] = encrypter.obfuscated_token(credentials['server_url'])
  128. return credentials
  129. @classmethod
  130. def _get_extra_credentials(self, credentials: dict) -> dict:
  131. url = f"{credentials['server_url']}/v1/models/{credentials['model_uid']}"
  132. response = requests.get(url)
  133. if response.status_code != 200:
  134. raise RuntimeError(
  135. f"Failed to get the model description, detail: {response.json()['detail']}"
  136. )
  137. desc = response.json()
  138. extra_credentials = {
  139. 'model_format': desc['model_format'],
  140. }
  141. if desc["model_format"] == "ggmlv3" and "chatglm" in desc["model_name"]:
  142. extra_credentials['model_handle_type'] = 'chatglm'
  143. elif "generate" in desc["model_ability"]:
  144. extra_credentials['model_handle_type'] = 'generate'
  145. elif "chat" in desc["model_ability"]:
  146. extra_credentials['model_handle_type'] = 'chat'
  147. else:
  148. raise NotImplementedError(f"Model handle type not supported.")
  149. return extra_credentials
  150. @classmethod
  151. def is_provider_credentials_valid_or_raise(cls, credentials: dict):
  152. return
  153. @classmethod
  154. def encrypt_provider_credentials(cls, tenant_id: str, credentials: dict) -> dict:
  155. return {}
  156. def get_provider_credentials(self, obfuscated: bool = False) -> dict:
  157. return {}