Browse Source

Fix: update qwen model and model config (#8584)

Co-authored-by: -LAN- <laipz8200@outlook.com>
Su Yang 8 months ago
parent
commit
c87f710d58
39 changed files with 1464 additions and 40 deletions
  1. 51 0
      api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
  2. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
  3. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
  4. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
  5. 1 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
  6. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
  7. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
  8. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
  9. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
  10. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
  11. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
  12. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
  13. 1 1
      api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
  14. 1 1
      api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
  15. 1 1
      api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
  16. 81 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
  17. 81 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
  18. 3 3
      api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
  19. 3 3
      api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
  20. 3 3
      api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
  21. 3 3
      api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
  22. 5 5
      api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
  23. 2 2
      api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
  24. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
  25. 1 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
  26. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
  27. 6 4
      api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
  28. 3 3
      api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
  29. 3 3
      api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
  30. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
  31. 1 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
  32. 79 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
  33. 7 5
      api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
  34. 1 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
  35. 11 1
      api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
  36. 11 1
      api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
  37. 57 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
  38. 10 0
      api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
  39. 11 1
      api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml

+ 51 - 0
api/core/model_runtime/model_providers/tongyi/llm/_position.yaml

@@ -0,0 +1,51 @@
+- qwen-vl-max-0809
+- qwen-vl-max-0201
+- qwen-vl-max
+- qwen-max-latest
+- qwen-max-1201
+- qwen-max-0919
+- qwen-max-0428
+- qwen-max-0403
+- qwen-max-0107
+- qwen-max
+- qwen-max-longcontext
+- qwen-plus-latest
+- qwen-plus-0919
+- qwen-plus-0806
+- qwen-plus-0723
+- qwen-plus-0624
+- qwen-plus-0206
+- qwen-plus-chat
+- qwen-plus
+- qwen-vl-plus-0809
+- qwen-vl-plus-0201
+- qwen-vl-plus
+- qwen-turbo-latest
+- qwen-turbo-0919
+- qwen-turbo-0624
+- qwen-turbo-0206
+- qwen-turbo-chat
+- qwen-turbo
+- qwen2.5-72b-instruct
+- qwen2.5-32b-instruct
+- qwen2.5-14b-instruct
+- qwen2.5-7b-instruct
+- qwen2.5-3b-instruct
+- qwen2.5-1.5b-instruct
+- qwen2.5-0.5b-instruct
+- qwen2.5-coder-7b-instruct
+- qwen2-math-72b-instruct
+- qwen2-math-7b-instruct
+- qwen2-math-1.5b-instruct
+- qwen-long
+- qwen-math-plus-latest
+- qwen-math-plus-0919
+- qwen-math-plus-0816
+- qwen-math-plus
+- qwen-math-turbo-latest
+- qwen-math-turbo-0919
+- qwen-math-turbo
+- qwen-coder-turbo-latest
+- qwen-coder-turbo-0919
+- qwen-coder-turbo
+- farui-plus

File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml


File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml


File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml


+ 1 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml

@@ -1,3 +1,4 @@
+# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
 model: qwen-long
 label:
   en_US: qwen-long

File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml


File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml


File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml


File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml


File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml


File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml


File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml


+ 1 - 1
api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml

@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature

+ 1 - 1
api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml

@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature

+ 1 - 1
api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml

@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature

File diff suppressed because it is too large
+ 81 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml


File diff suppressed because it is too large
+ 81 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml


+ 3 - 3
api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml

@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 32768
+  context_size: 32000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -22,9 +22,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 2000
+    default: 8000
     min: 1
-    max: 2000
+    max: 8000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.

+ 3 - 3
api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml

@@ -8,7 +8,7 @@ features:
   - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -75,7 +75,7 @@ parameter_rules:
   - name: response_format
     use_template: response_format
 pricing:
-  input: '0.04'
-  output: '0.12'
+  input: '0.02'
+  output: '0.06'
   unit: '0.001'
   currency: RMB

+ 3 - 3
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml

@@ -6,7 +6,7 @@ features:
   - agent-thought
 model_properties:
   mode: completion
-  context_size: 32768
+  context_size: 32000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 2000
+    default: 8000
     min: 1
-    max: 2000
+    max: 8000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.

+ 3 - 3
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml

@@ -6,7 +6,7 @@ features:
   - agent-thought
 model_properties:
   mode: completion
-  context_size: 32768
+  context_size: 32000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 2000
+    default: 8000
     min: 1
-    max: 2000
+    max: 8000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.

+ 5 - 5
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml

@@ -1,12 +1,12 @@
-model: qwen-plus-0806
+model: qwen-plus-0723
 label:
-  en_US: qwen-plus-0806
+  en_US: qwen-plus-0723
 model_type: llm
 features:
   - agent-thought
 model_properties:
   mode: completion
-  context_size: 32768
+  context_size: 32000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 2000
+    default: 8000
     min: 1
-    max: 2000
+    max: 8000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.

+ 2 - 2
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml

@@ -20,9 +20,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 2000
+    default: 8192
     min: 1
-    max: 2000
+    max: 8192
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.

File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml


+ 1 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml

@@ -79,3 +79,4 @@ pricing:
   output: '0.012'
   unit: '0.001'
   currency: RMB
+deprecated: true

File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml


+ 6 - 4
api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml

@@ -3,7 +3,9 @@ label:
   en_US: qwen-plus
 model_type: llm
 features:
+  - multi-tool-call
   - agent-thought
+  - stream-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -20,9 +22,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 2000
+    default: 8192
     min: 1
-    max: 2000
+    max: 8192
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -73,7 +75,7 @@ parameter_rules:
   - name: response_format
     use_template: response_format
 pricing:
-  input: '0.004'
-  output: '0.012'
+  input: '0.0008'
+  output: '0.002'
   unit: '0.001'
   currency: RMB

+ 3 - 3
api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml

@@ -6,7 +6,7 @@ features:
   - agent-thought
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 1500
+    default: 2000
     min: 1
-    max: 1500
+    max: 2000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.

+ 3 - 3
api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml

@@ -6,7 +6,7 @@ features:
   - agent-thought
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -20,9 +20,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 1500
+    default: 2000
     min: 1
-    max: 1500
+    max: 2000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.

File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml


+ 1 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml

@@ -79,3 +79,4 @@ pricing:
   output: '0.006'
   unit: '0.001'
   currency: RMB
+deprecated: true

File diff suppressed because it is too large
+ 79 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml


+ 7 - 5
api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml

@@ -3,10 +3,12 @@ label:
   en_US: qwen-turbo
 model_type: llm
 features:
+  - multi-tool-call
   - agent-thought
+  - stream-tool-call
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: temperature
     use_template: temperature
@@ -20,9 +22,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     type: int
-    default: 1500
+    default: 2000
     min: 1
-    max: 1500
+    max: 2000
     help:
       zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
       en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
@@ -73,7 +75,7 @@ parameter_rules:
   - name: response_format
     use_template: response_format
 pricing:
-  input: '0.002'
-  output: '0.006'
+  input: '0.0006'
+  output: '0.0003'
   unit: '0.001'
   currency: RMB

+ 1 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml

@@ -45,3 +45,4 @@ pricing:
   output: '0.02'
   unit: '0.001'
   currency: RMB
+deprecated: true

+ 11 - 1
api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml

@@ -7,7 +7,7 @@ features:
   - agent-thought
 model_properties:
   mode: chat
-  context_size: 32768
+  context_size: 32000
 parameter_rules:
   - name: top_p
     use_template: top_p
@@ -28,6 +28,16 @@ parameter_rules:
     help:
       zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
       en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
   - name: seed
     required: false
     type: int

+ 11 - 1
api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml

@@ -7,7 +7,7 @@ features:
   - agent-thought
 model_properties:
   mode: chat
-  context_size: 32768
+  context_size: 32000
 parameter_rules:
   - name: top_p
     use_template: top_p
@@ -28,6 +28,16 @@ parameter_rules:
     help:
       zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
       en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
   - name: seed
     required: false
     type: int

+ 57 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml

@@ -0,0 +1,57 @@
+model: qwen-vl-plus-0201
+label:
+  en_US: qwen-vl-plus-0201
+model_type: llm
+features:
+  - vision
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8000
+parameter_rules:
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.02'
+  output: '0.02'
+  unit: '0.001'
+  currency: RMB

+ 10 - 0
api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml

@@ -28,6 +28,16 @@ parameter_rules:
     help:
       zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
       en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
   - name: seed
     required: false
     type: int

+ 11 - 1
api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml

@@ -7,7 +7,7 @@ features:
   - agent-thought
 model_properties:
   mode: chat
-  context_size: 8192
+  context_size: 8000
 parameter_rules:
   - name: top_p
     use_template: top_p
@@ -28,6 +28,16 @@ parameter_rules:
     help:
       zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
       en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: max_tokens
+    required: false
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
   - name: seed
     required: false
     type: int