преди 1 година · 215661ef91
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -33,3 +33,4 @@
 
				 - deepseek
			
 
				 - hunyuan
			
 
				 - siliconflow
			
 
				+- perfxcloud
			
--- a/api/core/model_runtime/model_providers/perfxcloud/__init__.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/__init__.py
--- a/api/core/model_runtime/model_providers/perfxcloud/_assets/icon_l_en.svg
+++ b/api/core/model_runtime/model_providers/perfxcloud/_assets/icon_l_en.svg
--- a/api/core/model_runtime/model_providers/perfxcloud/_assets/icon_s_en.svg
+++ b/api/core/model_runtime/model_providers/perfxcloud/_assets/icon_s_en.svg
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen-14B-Chat-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen-14B-Chat-Int4.yaml
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-110B-Chat-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-110B-Chat-GPTQ-Int4.yaml
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/__init__.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/__init__.py
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
@@ -0,0 +1,6 @@
 
				+- Qwen2-72B-Instruct-GPTQ-Int4
			
 
				+- Qwen2-7B
			
 
				+- Qwen1.5-110B-Chat-GPTQ-Int4
			
 
				+- Qwen1.5-72B-Chat-GPTQ-Int4
			
 
				+- Qwen1.5-7B
			
 
				+- Qwen-14B-Chat-Int4
			
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/llm.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/llm.py
@@ -0,0 +1,110 @@
 
				+from collections.abc import Generator
			
 
				+from typing import Optional, Union
			
 
				+from urllib.parse import urlparse
			
 
				+
			
 
				+import tiktoken
			
 
				+
			
 
				+from core.model_runtime.entities.llm_entities import LLMResult
			
 
				+from core.model_runtime.entities.message_entities import (
			
 
				+    PromptMessage,
			
 
				+    PromptMessageTool,
			
 
				+)
			
 
				+from core.model_runtime.model_providers.openai.llm.llm import OpenAILargeLanguageModel
			
 
				+
			
 
				+
			
 
				+class PerfXCloudLargeLanguageModel(OpenAILargeLanguageModel):
			
 
				+    def _invoke(self, model: str, credentials: dict,
			
 
				+                prompt_messages: list[PromptMessage], model_parameters: dict,
			
 
				+                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
			
 
				+                stream: bool = True, user: Optional[str] = None) \
			
 
				+            -> Union[LLMResult, Generator]:
			
 
				+        self._add_custom_parameters(credentials)
			
 
				+
			
 
				+        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream)
			
 
				+
			
 
				+    def validate_credentials(self, model: str, credentials: dict) -> None:
			
 
				+        self._add_custom_parameters(credentials)
			
 
				+        super().validate_credentials(model, credentials)
			
 
				+
			
 
				+    # refactored from openai model runtime, use cl100k_base for calculate token number
			
 
				+    def _num_tokens_from_string(self, model: str, text: str,
			
 
				+                                tools: Optional[list[PromptMessageTool]] = None) -> int:
			
 
				+        """
			
 
				+        Calculate num tokens for text completion model with tiktoken package.
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param text: prompt text
			
 
				+        :param tools: tools for tool calling
			
 
				+        :return: number of tokens
			
 
				+        """
			
 
				+        encoding = tiktoken.get_encoding("cl100k_base")
			
 
				+        num_tokens = len(encoding.encode(text))
			
 
				+
			
 
				+        if tools:
			
 
				+            num_tokens += self._num_tokens_for_tools(encoding, tools)
			
 
				+
			
 
				+        return num_tokens
			
 
				+
			
 
				+    # refactored from openai model runtime, use cl100k_base for calculate token number
			
 
				+    def _num_tokens_from_messages(self, model: str, messages: list[PromptMessage],
			
 
				+                                  tools: Optional[list[PromptMessageTool]] = None) -> int:
			
 
				+        """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
			
 
				+
			
 
				+        Official documentation: https://github.com/openai/openai-cookbook/blob/
			
 
				+        main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
			
 
				+        encoding = tiktoken.get_encoding("cl100k_base")
			
 
				+        tokens_per_message = 3
			
 
				+        tokens_per_name = 1
			
 
				+
			
 
				+        num_tokens = 0
			
 
				+        messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages]
			
 
				+        for message in messages_dict:
			
 
				+            num_tokens += tokens_per_message
			
 
				+            for key, value in message.items():
			
 
				+                # Cast str(value) in case the message value is not a string
			
 
				+                # This occurs with function messages
			
 
				+                # TODO: The current token calculation method for the image type is not implemented,
			
 
				+                #  which need to download the image and then get the resolution for calculation,
			
 
				+                #  and will increase the request delay
			
 
				+                if isinstance(value, list):
			
 
				+                    text = ''
			
 
				+                    for item in value:
			
 
				+                        if isinstance(item, dict) and item['type'] == 'text':
			
 
				+                            text += item['text']
			
 
				+
			
 
				+                    value = text
			
 
				+
			
 
				+                if key == "tool_calls":
			
 
				+                    for tool_call in value:
			
 
				+                        for t_key, t_value in tool_call.items():
			
 
				+                            num_tokens += len(encoding.encode(t_key))
			
 
				+                            if t_key == "function":
			
 
				+                                for f_key, f_value in t_value.items():
			
 
				+                                    num_tokens += len(encoding.encode(f_key))
			
 
				+                                    num_tokens += len(encoding.encode(f_value))
			
 
				+                            else:
			
 
				+                                num_tokens += len(encoding.encode(t_key))
			
 
				+                                num_tokens += len(encoding.encode(t_value))
			
 
				+                else:
			
 
				+                    num_tokens += len(encoding.encode(str(value)))
			
 
				+
			
 
				+                if key == "name":
			
 
				+                    num_tokens += tokens_per_name
			
 
				+
			
 
				+        # every reply is primed with <im_start>assistant
			
 
				+        num_tokens += 3
			
 
				+
			
 
				+        if tools:
			
 
				+            num_tokens += self._num_tokens_for_tools(encoding, tools)
			
 
				+
			
 
				+        return num_tokens
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _add_custom_parameters(credentials: dict) -> None:
			
 
				+        credentials['mode'] = 'chat'
			
 
				+        credentials['openai_api_key']=credentials['api_key']
			
 
				+        if 'endpoint_url' not in credentials or credentials['endpoint_url'] == "":
			
 
				+            credentials['openai_api_base']='https://cloud.perfxlab.cn'
			
 
				+        else:
			
 
				+            parsed_url = urlparse(credentials['endpoint_url'])
			
 
				+            credentials['openai_api_base']=f"{parsed_url.scheme}://{parsed_url.netloc}"
			
--- a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
@@ -0,0 +1,32 @@
 
				+import logging
			
 
				+
			
 
				+from core.model_runtime.entities.model_entities import ModelType
			
 
				+from core.model_runtime.errors.validate import CredentialsValidateFailedError
			
 
				+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class PerfXCloudProvider(ModelProvider):
			
 
				+
			
 
				+    def validate_provider_credentials(self, credentials: dict) -> None:
			
 
				+        """
			
 
				+        Validate provider credentials
			
 
				+        if validate failed, raise exception
			
 
				+
			
 
				+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
			
 
				+        """
			
 
				+        try:
			
 
				+            model_instance = self.get_model_instance(ModelType.LLM)
			
 
				+
			
 
				+            # Use `Qwen2_72B_Chat_GPTQ_Int4` model for validate,
			
 
				+            # no matter what model you pass in, text completion model or chat model
			
 
				+            model_instance.validate_credentials(
			
 
				+                model='Qwen2-72B-Instruct-GPTQ-Int4',
			
 
				+                credentials=credentials
			
 
				+            )
			
 
				+        except CredentialsValidateFailedError as ex:
			
 
				+            raise ex
			
 
				+        except Exception as ex:
			
 
				+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
			
 
				+            raise ex
			
--- a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.yaml
@@ -0,0 +1,42 @@
 
				+provider: perfxcloud
			
 
				+label:
			
 
				+  en_US: PerfXCloud
			
 
				+  zh_Hans: PerfXCloud
			
 
				+description:
			
 
				+  en_US: PerfXCloud (Pengfeng Technology) is an AI development and deployment platform tailored for developers and enterprises, providing reasoning capabilities for multiple models.
			
 
				+  zh_Hans: PerfXCloud（澎峰科技）为开发者和企业量身打造的AI开发和部署平台，提供多种模型的的推理能力。
			
 
				+icon_small:
			
 
				+  en_US: icon_s_en.svg
			
 
				+icon_large:
			
 
				+  en_US: icon_l_en.svg
			
 
				+background: "#e3f0ff"
			
 
				+help:
			
 
				+  title:
			
 
				+    en_US: Get your API Key from PerfXCloud
			
 
				+    zh_Hans: 从 PerfXCloud 获取 API Key
			
 
				+  url:
			
 
				+    en_US: https://cloud.perfxlab.cn/panel/token
			
 
				+supported_model_types:
			
 
				+  - llm
			
 
				+  - text-embedding
			
 
				+configurate_methods:
			
 
				+  - predefined-model
			
 
				+provider_credential_schema:
			
 
				+  credential_form_schemas:
			
 
				+    - variable: api_key
			
 
				+      label:
			
 
				+        en_US: API Key
			
 
				+      type: secret-input
			
 
				+      required: true
			
 
				+      placeholder:
			
 
				+        zh_Hans: 在此输入您的 API Key
			
 
				+        en_US: Enter your API Key
			
 
				+    - variable: endpoint_url
			
 
				+      label:
			
 
				+        zh_Hans: 自定义 API endpoint 地址
			
 
				+        en_US: Custom API endpoint URL
			
 
				+      type: text-input
			
 
				+      required: false
			
 
				+      placeholder:
			
 
				+        zh_Hans: Base URL, e.g. https://cloud.perfxlab.cn/v1
			
 
				+        en_US: Base URL, e.g. https://cloud.perfxlab.cn/v1
			
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/BAAI-bge-m3.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/BAAI-bge-m3.yaml
@@ -0,0 +1,4 @@
 
				+model: BAAI/bge-m3
			
 
				+model_type: text-embedding
			
 
				+model_properties:
			
 
				+  context_size: 32768
			
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/__init__.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/__init__.py
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@@ -0,0 +1,250 @@
 
				+import json
			
 
				+import time
			
 
				+from decimal import Decimal
			
 
				+from typing import Optional
			
 
				+from urllib.parse import urljoin
			
 
				+
			
 
				+import numpy as np
			
 
				+import requests
			
 
				+
			
 
				+from core.model_runtime.entities.common_entities import I18nObject
			
 
				+from core.model_runtime.entities.model_entities import (
			
 
				+    AIModelEntity,
			
 
				+    FetchFrom,
			
 
				+    ModelPropertyKey,
			
 
				+    ModelType,
			
 
				+    PriceConfig,
			
 
				+    PriceType,
			
 
				+)
			
 
				+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
			
 
				+from core.model_runtime.errors.validate import CredentialsValidateFailedError
			
 
				+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
			
 
				+from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOAI_API_Compat
			
 
				+
			
 
				+
			
 
				+class OAICompatEmbeddingModel(_CommonOAI_API_Compat, TextEmbeddingModel):
			
 
				+    """
			
 
				+    Model class for an OpenAI API-compatible text embedding model.
			
 
				+    """
			
 
				+
			
 
				+    def _invoke(self, model: str, credentials: dict,
			
 
				+                texts: list[str], user: Optional[str] = None) \
			
 
				+            -> TextEmbeddingResult:
			
 
				+        """
			
 
				+        Invoke text embedding model
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param texts: texts to embed
			
 
				+        :param user: unique user id
			
 
				+        :return: embeddings result
			
 
				+        """
			
 
				+       
			
 
				+        # Prepare headers and payload for the request
			
 
				+        headers = {
			
 
				+            'Content-Type': 'application/json'
			
 
				+        }
			
 
				+
			
 
				+        api_key = credentials.get('api_key')
			
 
				+        if api_key:
			
 
				+            headers["Authorization"] = f"Bearer {api_key}"
			
 
				+
			
 
				+        if 'endpoint_url' not in credentials or credentials['endpoint_url'] == "":
			
 
				+            endpoint_url='https://cloud.perfxlab.cn/v1/'
			
 
				+        else:
			
 
				+            endpoint_url = credentials.get('endpoint_url')
			
 
				+            if not endpoint_url.endswith('/'):
			
 
				+                endpoint_url += '/'
			
 
				+
			
 
				+        endpoint_url = urljoin(endpoint_url, 'embeddings')
			
 
				+
			
 
				+        extra_model_kwargs = {}
			
 
				+        if user:
			
 
				+            extra_model_kwargs['user'] = user
			
 
				+
			
 
				+        extra_model_kwargs['encoding_format'] = 'float'
			
 
				+
			
 
				+        # get model properties
			
 
				+        context_size = self._get_context_size(model, credentials)
			
 
				+        max_chunks = self._get_max_chunks(model, credentials)
			
 
				+
			
 
				+        inputs = []
			
 
				+        indices = []
			
 
				+        used_tokens = 0
			
 
				+
			
 
				+        for i, text in enumerate(texts):
			
 
				+
			
 
				+            # Here token count is only an approximation based on the GPT2 tokenizer
			
 
				+            # TODO: Optimize for better token estimation and chunking
			
 
				+            num_tokens = self._get_num_tokens_by_gpt2(text)
			
 
				+
			
 
				+            if num_tokens >= context_size:
			
 
				+                cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
			
 
				+                # if num tokens is larger than context length, only use the start
			
 
				+                inputs.append(text[0: cutoff])
			
 
				+            else:
			
 
				+                inputs.append(text)
			
 
				+            indices += [i]
			
 
				+
			
 
				+        batched_embeddings = []
			
 
				+        _iter = range(0, len(inputs), max_chunks)
			
 
				+
			
 
				+        for i in _iter:
			
 
				+            # Prepare the payload for the request
			
 
				+            payload = {
			
 
				+                'input': inputs[i: i + max_chunks],
			
 
				+                'model': model,
			
 
				+                **extra_model_kwargs
			
 
				+            }
			
 
				+
			
 
				+            # Make the request to the OpenAI API
			
 
				+            response = requests.post(
			
 
				+                endpoint_url,
			
 
				+                headers=headers,
			
 
				+                data=json.dumps(payload),
			
 
				+                timeout=(10, 300)
			
 
				+            )
			
 
				+
			
 
				+            response.raise_for_status()  # Raise an exception for HTTP errors
			
 
				+            response_data = response.json()
			
 
				+
			
 
				+            # Extract embeddings and used tokens from the response
			
 
				+            embeddings_batch = [data['embedding'] for data in response_data['data']]
			
 
				+            embedding_used_tokens = response_data['usage']['total_tokens']
			
 
				+
			
 
				+            used_tokens += embedding_used_tokens
			
 
				+            batched_embeddings += embeddings_batch
			
 
				+
			
 
				+        # calc usage
			
 
				+        usage = self._calc_response_usage(
			
 
				+            model=model,
			
 
				+            credentials=credentials,
			
 
				+            tokens=used_tokens
			
 
				+        )
			
 
				+        
			
 
				+        return TextEmbeddingResult(
			
 
				+            embeddings=batched_embeddings,
			
 
				+            usage=usage,
			
 
				+            model=model
			
 
				+        )
			
 
				+
			
 
				+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
			
 
				+        """
			
 
				+        Approximate number of tokens for given messages using GPT2 tokenizer
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param texts: texts to embed
			
 
				+        :return:
			
 
				+        """
			
 
				+        return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
			
 
				+
			
 
				+    def validate_credentials(self, model: str, credentials: dict) -> None:
			
 
				+        """
			
 
				+        Validate model credentials
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            headers = {
			
 
				+                'Content-Type': 'application/json'
			
 
				+            }
			
 
				+
			
 
				+            api_key = credentials.get('api_key')
			
 
				+
			
 
				+            if api_key:
			
 
				+                headers["Authorization"] = f"Bearer {api_key}"
			
 
				+
			
 
				+            if 'endpoint_url' not in credentials or credentials['endpoint_url'] == "":
			
 
				+                endpoint_url='https://cloud.perfxlab.cn/v1/'
			
 
				+            else:
			
 
				+                endpoint_url = credentials.get('endpoint_url')
			
 
				+                if not endpoint_url.endswith('/'):
			
 
				+                    endpoint_url += '/'
			
 
				+
			
 
				+            endpoint_url = urljoin(endpoint_url, 'embeddings')
			
 
				+
			
 
				+            payload = {
			
 
				+                'input': 'ping',
			
 
				+                'model': model
			
 
				+            }
			
 
				+
			
 
				+            response = requests.post(
			
 
				+                url=endpoint_url,
			
 
				+                headers=headers,
			
 
				+                data=json.dumps(payload),
			
 
				+                timeout=(10, 300)
			
 
				+            )
			
 
				+
			
 
				+            if response.status_code != 200:
			
 
				+                raise CredentialsValidateFailedError(
			
 
				+                    f'Credentials validation failed with status code {response.status_code}')
			
 
				+
			
 
				+            try:
			
 
				+                json_result = response.json()
			
 
				+            except json.JSONDecodeError as e:
			
 
				+                raise CredentialsValidateFailedError('Credentials validation failed: JSON decode error')
			
 
				+
			
 
				+            if 'model' not in json_result:
			
 
				+                raise CredentialsValidateFailedError(
			
 
				+                    'Credentials validation failed: invalid response')
			
 
				+        except CredentialsValidateFailedError:
			
 
				+            raise
			
 
				+        except Exception as ex:
			
 
				+            raise CredentialsValidateFailedError(str(ex))
			
 
				+
			
 
				+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
			
 
				+        """
			
 
				+            generate custom model entities from credentials
			
 
				+        """
			
 
				+        entity = AIModelEntity(
			
 
				+            model=model,
			
 
				+            label=I18nObject(en_US=model),
			
 
				+            model_type=ModelType.TEXT_EMBEDDING,
			
 
				+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
			
 
				+            model_properties={
			
 
				+                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get('context_size')),
			
 
				+                ModelPropertyKey.MAX_CHUNKS: 1,
			
 
				+            },
			
 
				+            parameter_rules=[],
			
 
				+            pricing=PriceConfig(
			
 
				+                input=Decimal(credentials.get('input_price', 0)),
			
 
				+                unit=Decimal(credentials.get('unit', 0)),
			
 
				+                currency=credentials.get('currency', "USD")
			
 
				+            )
			
 
				+        )
			
 
				+
			
 
				+        return entity
			
 
				+
			
 
				+
			
 
				+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
			
 
				+        """
			
 
				+        Calculate response usage
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param tokens: input tokens
			
 
				+        :return: usage
			
 
				+        """
			
 
				+        # get input price info
			
 
				+        input_price_info = self.get_price(
			
 
				+            model=model,
			
 
				+            credentials=credentials,
			
 
				+            price_type=PriceType.INPUT,
			
 
				+            tokens=tokens
			
 
				+        )
			
 
				+
			
 
				+        # transform usage
			
 
				+        usage = EmbeddingUsage(
			
 
				+            tokens=tokens,
			
 
				+            total_tokens=tokens,
			
 
				+            unit_price=input_price_info.unit_price,
			
 
				+            price_unit=input_price_info.unit,
			
 
				+            total_price=input_price_info.total_amount,
			
 
				+            currency=input_price_info.currency,
			
 
				+            latency=time.perf_counter() - self.started_at
			
 
				+        )
			
 
				+
			
 
				+        return usage