Browse Source

fix: tongyi stream generate not incremental and add qwen max models (#2013)

takatost 1 year ago
parent
commit
34bf2877c8

+ 5 - 6
api/core/model_runtime/model_providers/__base/ai_model.py

@@ -1,6 +1,4 @@
 import decimal
-import json
-import logging
 import os
 from abc import ABC, abstractmethod
 from typing import Optional
@@ -12,7 +10,6 @@ from core.model_runtime.entities.model_entities import (AIModelEntity, DefaultPa
                                                         PriceConfig, PriceInfo, PriceType)
 from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
 from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
-from pydantic import ValidationError
 
 
 class AIModel(ABC):
@@ -54,14 +51,16 @@ class AIModel(ABC):
         :param error: model invoke error
         :return: unified error
         """
+        provider_name = self.__class__.__module__.split('.')[-3]
+
         for invoke_error, model_errors in self._invoke_error_mapping.items():
             if isinstance(error, tuple(model_errors)):
                 if invoke_error == InvokeAuthorizationError:
-                    return invoke_error(description="Incorrect model credentials provided, please check and try again. ")
+                    return invoke_error(description=f"[{provider_name}] Incorrect model credentials provided, please check and try again. ")
 
-                return invoke_error(description=f"{invoke_error.description}: {str(error)}")
+                return invoke_error(description=f"[{provider_name}] {invoke_error.description}, {str(error)}")
 
-        return InvokeError(description=f"Error: {str(error)}")
+        return InvokeError(description=f"[{provider_name}] Error: {str(error)}")
 
     def get_price(self, model: str, credentials: dict, price_type: PriceType, tokens: int) -> PriceInfo:
         """

+ 46 - 16
api/core/model_runtime/model_providers/tongyi/llm/llm.py

@@ -1,8 +1,8 @@
-from http import HTTPStatus
 from typing import Generator, List, Optional, Union
 
-import dashscope
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from dashscope import get_tokenizer
+
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMMode
 from core.model_runtime.entities.message_entities import (AssistantPromptMessage, PromptMessage, PromptMessageTool,
                                                           SystemPromptMessage, UserPromptMessage)
 from core.model_runtime.errors.invoke import (InvokeAuthorizationError, InvokeBadRequestError, InvokeConnectionError,
@@ -51,19 +51,12 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
         :param tools: tools for tool calling
         :return:
         """
-        # transform credentials to kwargs for model instance
-        credentials_kwargs = self._to_credential_kwargs(credentials)
+        tokenizer = get_tokenizer(model)
 
-        response = dashscope.Tokenization.call(
-            model=model,
-            prompt=self._convert_messages_to_prompt(prompt_messages),
-            **credentials_kwargs
-        )
-        
-        if response.status_code == HTTPStatus.OK:
-            return response['usage']['input_tokens']
-        else:
-            raise self._invoke_error_mapping[InvokeBadRequestError][0](response['message'])
+        # convert string to token ids
+        tokens = tokenizer.encode(self._convert_messages_to_prompt(prompt_messages))
+
+        return len(tokens)
 
     def validate_credentials(self, model: str, credentials: dict) -> None:
         """
@@ -119,14 +112,22 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
 
         params = {
             'model': model,
-            'prompt': self._convert_messages_to_prompt(prompt_messages),
             **model_parameters,
             **credentials_kwargs
         }
+
+        mode = self.get_model_mode(model, credentials)
+
+        if mode == LLMMode.CHAT:
+            params['messages'] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
+        else:
+            params['prompt'] = self._convert_messages_to_prompt(prompt_messages)
+
         if stream:
             responses = stream_generate_with_retry(
                 client, 
                 stream=True,
+                incremental_output=True,
                 **params
             )
 
@@ -267,6 +268,35 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
         # trim off the trailing ' ' that might come from the "Assistant: "
         return text.rstrip()
 
+    def _convert_prompt_messages_to_tongyi_messages(self, prompt_messages: list[PromptMessage]) -> list[dict]:
+        """
+        Convert prompt messages to tongyi messages
+
+        :param prompt_messages: prompt messages
+        :return: tongyi messages
+        """
+        tongyi_messages = []
+        for prompt_message in prompt_messages:
+            if isinstance(prompt_message, SystemPromptMessage):
+                tongyi_messages.append({
+                    'role': 'system',
+                    'content': prompt_message.content,
+                })
+            elif isinstance(prompt_message, UserPromptMessage):
+                tongyi_messages.append({
+                    'role': 'user',
+                    'content': prompt_message.content,
+                })
+            elif isinstance(prompt_message, AssistantPromptMessage):
+                tongyi_messages.append({
+                    'role': 'assistant',
+                    'content': prompt_message.content,
+                })
+            else:
+                raise ValueError(f"Got unknown type {prompt_message}")
+
+        return tongyi_messages
+
     @property
     def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
         """

File diff suppressed because it is too large
+ 57 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml


File diff suppressed because it is too large
+ 57 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml


File diff suppressed because it is too large
+ 57 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml


+ 8 - 4
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml

@@ -24,7 +24,7 @@ parameter_rules:
     use_template: max_tokens
     default: 2000
     min: 1
-    max: 2000
+    max: 30000
     help:
       zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。
       en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
@@ -42,10 +42,9 @@ parameter_rules:
       zh_Hans: 随机种子
       en_US: Random seed
     type: int
-    default: 1234
     help:
-      zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。默认值 1234。
-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. Default value 1234.
+      zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。
+      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
     required: false
   - name: repetition_penalty
     label:
@@ -55,3 +54,8 @@ parameter_rules:
     help:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
+pricing:
+  input: '0.02'
+  output: '0.02'
+  unit: '0.001'
+  currency: RMB

+ 8 - 4
api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml

@@ -24,7 +24,7 @@ parameter_rules:
     use_template: max_tokens
     default: 1500
     min: 1
-    max: 1500
+    max: 6000
     help:
       zh_Hans: 用于限制模型生成token的数量,max_tokens设置的是生成上限,并不表示一定会生成这么多的token数量。
       en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
@@ -42,10 +42,9 @@ parameter_rules:
       zh_Hans: 随机种子
       en_US: Random seed
     type: int
-    default: 1234
     help:
-      zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。默认值 1234。
-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. Default value 1234.
+      zh_Hans: 生成时,随机数的种子,用于控制模型生成的随机性。如果使用相同的种子,每次运行生成的结果都将相同;当需要复现模型的生成结果时,可以使用相同的种子。seed参数支持无符号64位整数类型。
+      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
     required: false
   - name: repetition_penalty
     label:
@@ -56,3 +55,8 @@ parameter_rules:
       zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
       en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
     required: false
+pricing:
+  input: '0.008'
+  output: '0.008'
+  unit: '0.001'
+  currency: RMB

+ 1 - 1
api/requirements.txt

@@ -44,7 +44,7 @@ readabilipy==0.2.0
 google-search-results==2.4.2
 replicate~=0.22.0
 websocket-client~=1.7.0
-dashscope~=1.13.5
+dashscope[tokenizer]~=1.14.0
 huggingface_hub~=0.16.4
 transformers~=4.31.0
 pandas==1.5.3