преди 1 година · 34bf2877c8
--- a/api/core/model_runtime/model_providers/__base/ai_model.py
+++ b/api/core/model_runtime/model_providers/__base/ai_model.py
@@ -1,6 +1,4 @@
 
				 import decimal
			
 
				-import json
			
 
				-import logging
			
 
				 import os
			
 
				 from abc import ABC, abstractmethod
			
 
				 from typing import Optional
			
@@ -12,7 +10,6 @@ from core.model_runtime.entities.model_entities import (AIModelEntity, DefaultPa
 
				                                                         PriceConfig, PriceInfo, PriceType)
			
 
				 from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
			
 
				 from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
			
 
				-from pydantic import ValidationError
			
 
				 
			
 
				 
			
 
				 class AIModel(ABC):
			
@@ -54,14 +51,16 @@ class AIModel(ABC):
 
				         :param error: model invoke error
			
 
				         :return: unified error
			
 
				         """
			
 
				+        provider_name = self.__class__.__module__.split('.')[-3]
			
 
				+
			
 
				         for invoke_error, model_errors in self._invoke_error_mapping.items():
			
 
				             if isinstance(error, tuple(model_errors)):
			
 
				                 if invoke_error == InvokeAuthorizationError:
			
 
				-                    return invoke_error(description="Incorrect model credentials provided, please check and try again. ")
			
 
				+                    return invoke_error(description=f"[{provider_name}] Incorrect model credentials provided, please check and try again. ")
			
 
				 
			
 
				-                return invoke_error(description=f"{invoke_error.description}: {str(error)}")
			
 
				+                return invoke_error(description=f"[{provider_name}] {invoke_error.description}, {str(error)}")
			
 
				 
			
 
				-        return InvokeError(description=f"Error: {str(error)}")
			
 
				+        return InvokeError(description=f"[{provider_name}] Error: {str(error)}")
			
 
				 
			
 
				     def get_price(self, model: str, credentials: dict, price_type: PriceType, tokens: int) -> PriceInfo:
			
 
				         """
			
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -1,8 +1,8 @@
 
				-from http import HTTPStatus
			
 
				 from typing import Generator, List, Optional, Union
			
 
				 
			
 
				-import dashscope
			
 
				-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
			
 
				+from dashscope import get_tokenizer
			
 
				+
			
 
				+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMMode
			
 
				 from core.model_runtime.entities.message_entities import (AssistantPromptMessage, PromptMessage, PromptMessageTool,
			
 
				                                                           SystemPromptMessage, UserPromptMessage)
			
 
				 from core.model_runtime.errors.invoke import (InvokeAuthorizationError, InvokeBadRequestError, InvokeConnectionError,
			
@@ -51,19 +51,12 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
 
				         :param tools: tools for tool calling
			
 
				         :return:
			
 
				         """
			
 
				-        # transform credentials to kwargs for model instance
			
 
				-        credentials_kwargs = self._to_credential_kwargs(credentials)
			
 
				+        tokenizer = get_tokenizer(model)
			
 
				 
			
 
				-        response = dashscope.Tokenization.call(
			
 
				-            model=model,
			
 
				-            prompt=self._convert_messages_to_prompt(prompt_messages),
			
 
				-            **credentials_kwargs
			
 
				-        )
			
 
				-        
			
 
				-        if response.status_code == HTTPStatus.OK:
			
 
				-            return response['usage']['input_tokens']
			
 
				-        else:
			
 
				-            raise self._invoke_error_mapping[InvokeBadRequestError][0](response['message'])
			
 
				+        # convert string to token ids
			
 
				+        tokens = tokenizer.encode(self._convert_messages_to_prompt(prompt_messages))
			
 
				+
			
 
				+        return len(tokens)
			
 
				 
			
 
				     def validate_credentials(self, model: str, credentials: dict) -> None:
			
 
				         """
			
@@ -119,14 +112,22 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
 
				 
			
 
				         params = {
			
 
				             'model': model,
			
 
				-            'prompt': self._convert_messages_to_prompt(prompt_messages),
			
 
				             **model_parameters,
			
 
				             **credentials_kwargs
			
 
				         }
			
 
				+
			
 
				+        mode = self.get_model_mode(model, credentials)
			
 
				+
			
 
				+        if mode == LLMMode.CHAT:
			
 
				+            params['messages'] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
			
 
				+        else:
			
 
				+            params['prompt'] = self._convert_messages_to_prompt(prompt_messages)
			
 
				+
			
 
				         if stream:
			
 
				             responses = stream_generate_with_retry(
			
 
				                 client, 
			
 
				                 stream=True,
			
 
				+                incremental_output=True,
			
 
				                 **params
			
 
				             )
			
 
				 
			
@@ -267,6 +268,35 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
 
				         # trim off the trailing ' ' that might come from the "Assistant: "
			
 
				         return text.rstrip()
			
 
				 
			
 
				+    def _convert_prompt_messages_to_tongyi_messages(self, prompt_messages: list[PromptMessage]) -> list[dict]:
			
 
				+        """
			
 
				+        Convert prompt messages to tongyi messages
			
 
				+
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :return: tongyi messages
			
 
				+        """
			
 
				+        tongyi_messages = []
			
 
				+        for prompt_message in prompt_messages:
			
 
				+            if isinstance(prompt_message, SystemPromptMessage):
			
 
				+                tongyi_messages.append({
			
 
				+                    'role': 'system',
			
 
				+                    'content': prompt_message.content,
			
 
				+                })
			
 
				+            elif isinstance(prompt_message, UserPromptMessage):
			
 
				+                tongyi_messages.append({
			
 
				+                    'role': 'user',
			
 
				+                    'content': prompt_message.content,
			
 
				+                })
			
 
				+            elif isinstance(prompt_message, AssistantPromptMessage):
			
 
				+                tongyi_messages.append({
			
 
				+                    'role': 'assistant',
			
 
				+                    'content': prompt_message.content,
			
 
				+                })
			
 
				+            else:
			
 
				+                raise ValueError(f"Got unknown type {prompt_message}")
			
 
				+
			
 
				+        return tongyi_messages
			
 
				+
			
 
				     @property
			
 
				     def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
			
 
				         """
			
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@@ -24,7 +24,7 @@ parameter_rules:
 
				     use_template: max_tokens
			
 
				     default: 2000
			
 
				     min: 1
			
 
				-    max: 2000
			
 
				+    max: 30000
			
 
				     help:
			
 
				       zh_Hans: 用于限制模型生成token的数量，max_tokens设置的是生成上限，并不表示一定会生成这么多的token数量。
			
 
				       en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
			
@@ -42,10 +42,9 @@ parameter_rules:
 
				       zh_Hans: 随机种子
			
 
				       en_US: Random seed
			
 
				     type: int
			
 
				-    default: 1234
			
 
				     help:
			
 
				-      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。默认值 1234。
			
 
				-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. Default value 1234.
			
 
				+      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。
			
 
				+      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
			
 
				     required: false
			
 
				   - name: repetition_penalty
			
 
				     label:
			
@@ -55,3 +54,8 @@ parameter_rules:
 
				     help:
			
 
				       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
			
 
				       en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
			
 
				+pricing:
			
 
				+  input: '0.02'
			
 
				+  output: '0.02'
			
 
				+  unit: '0.001'
			
 
				+  currency: RMB
			
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
@@ -24,7 +24,7 @@ parameter_rules:
 
				     use_template: max_tokens
			
 
				     default: 1500
			
 
				     min: 1
			
 
				-    max: 1500
			
 
				+    max: 6000
			
 
				     help:
			
 
				       zh_Hans: 用于限制模型生成token的数量，max_tokens设置的是生成上限，并不表示一定会生成这么多的token数量。
			
 
				       en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
			
@@ -42,10 +42,9 @@ parameter_rules:
 
				       zh_Hans: 随机种子
			
 
				       en_US: Random seed
			
 
				     type: int
			
 
				-    default: 1234
			
 
				     help:
			
 
				-      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。默认值 1234。
			
 
				-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. Default value 1234.
			
 
				+      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。
			
 
				+      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
			
 
				     required: false
			
 
				   - name: repetition_penalty
			
 
				     label:
			
@@ -56,3 +55,8 @@ parameter_rules:
 
				       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
			
 
				       en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
			
 
				     required: false
			
 
				+pricing:
			
 
				+  input: '0.008'
			
 
				+  output: '0.008'
			
 
				+  unit: '0.001'
			
 
				+  currency: RMB
			
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -44,7 +44,7 @@ readabilipy==0.2.0
 
				 google-search-results==2.4.2
			
 
				 replicate~=0.22.0
			
 
				 websocket-client~=1.7.0
			
 
				-dashscope~=1.13.5
			
 
				+dashscope[tokenizer]~=1.14.0
			
 
				 huggingface_hub~=0.16.4
			
 
				 transformers~=4.31.0
			
 
				 pandas==1.5.3