Переглянути джерело

feat: support more model types and builtin tools on aws/sagemaker (#8061)

Co-authored-by: Yuanbo Li <ybalbert@amazon.com>
ybalbert001 7 місяців тому
батько
коміт
954580a4af

+ 274 - 43
api/core/model_runtime/model_providers/sagemaker/llm/llm.py

@@ -1,17 +1,36 @@
 import json
 import logging
-from collections.abc import Generator
-from typing import Any, Optional, Union
+import re
+from collections.abc import Generator, Iterator
+from typing import Any, Optional, Union, cast
 
+# from openai.types.chat import ChatCompletion, ChatCompletionChunk
 import boto3
+from sagemaker import Predictor, serializers
+from sagemaker.session import Session
 
-from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import (
     AssistantPromptMessage,
+    ImagePromptMessageContent,
     PromptMessage,
+    PromptMessageContent,
+    PromptMessageContentType,
     PromptMessageTool,
+    SystemPromptMessage,
+    ToolPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import (
+    AIModelEntity,
+    FetchFrom,
+    I18nObject,
+    ModelFeature,
+    ModelPropertyKey,
+    ModelType,
+    ParameterRule,
+    ParameterType,
 )
-from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, I18nObject, ModelType
 from core.model_runtime.errors.invoke import (
     InvokeAuthorizationError,
     InvokeBadRequestError,
@@ -25,12 +44,140 @@ from core.model_runtime.model_providers.__base.large_language_model import Large
 
 logger = logging.getLogger(__name__)
 
+def inference(predictor, messages:list[dict[str,Any]], params:dict[str,Any], stop:list, stream=False):
+    """    
+    params:
+    predictor : Sagemaker Predictor 
+    messages (List[Dict[str,Any]]): message list。
+                messages = [
+                {"role": "system", "content":"please answer in Chinese"},
+                {"role": "user", "content": "who are you? what are you doing?"},
+            ]
+    params (Dict[str,Any]): model parameters for LLM。
+    stream (bool): False by default。
+    
+    response:
+    result of inference if stream is False
+    Iterator of Chunks if stream is True
+    """
+    payload = {
+        "model" : params.get('model_name'),
+        "stop" : stop,
+        "messages": messages,
+        "stream" : stream,
+        "max_tokens" : params.get('max_new_tokens', params.get('max_tokens', 2048)),
+        "temperature" : params.get('temperature', 0.1),
+        "top_p" : params.get('top_p', 0.9),
+    }
+
+    if not stream:
+        response = predictor.predict(payload)
+        return response
+    else:
+        response_stream = predictor.predict_stream(payload)
+        return response_stream
 
 class SageMakerLargeLanguageModel(LargeLanguageModel):
     """
     Model class for Cohere large language model.
     """
     sagemaker_client: Any = None
+    sagemaker_sess : Any = None
+    predictor : Any = None
+
+    def _handle_chat_generate_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
+                                       tools: list[PromptMessageTool],
+                                       resp: bytes) -> LLMResult:
+        """
+            handle normal chat generate response
+        """
+        resp_obj = json.loads(resp.decode('utf-8'))
+        resp_str = resp_obj.get('choices')[0].get('message').get('content')
+
+        if len(resp_str) == 0:
+            raise InvokeServerUnavailableError("Empty response")
+
+        assistant_prompt_message = AssistantPromptMessage(
+            content=resp_str,
+            tool_calls=[]
+        )
+
+        prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools)
+        completion_tokens = self._num_tokens_from_messages(messages=[assistant_prompt_message], tools=tools)
+
+        usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens,
+                                          completion_tokens=completion_tokens)
+
+        response = LLMResult(
+            model=model,
+            prompt_messages=prompt_messages,
+            system_fingerprint=None,
+            usage=usage,
+            message=assistant_prompt_message,
+        )
+
+        return response
+
+    def _handle_chat_stream_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
+                                     tools: list[PromptMessageTool],
+                                     resp: Iterator[bytes]) -> Generator:
+        """
+            handle stream chat generate response
+        """
+        full_response = ''
+        buffer = ""
+        for chunk_bytes in resp:
+            buffer += chunk_bytes.decode('utf-8')
+            last_idx = 0
+            for match in re.finditer(r'^data:\s*(.+?)(\n\n)', buffer):
+                try:
+                    data = json.loads(match.group(1).strip())
+                    last_idx = match.span()[1]
+
+                    if "content" in data["choices"][0]["delta"]:
+                        chunk_content = data["choices"][0]["delta"]["content"]
+                        assistant_prompt_message = AssistantPromptMessage(
+                            content=chunk_content,
+                            tool_calls=[] 
+                        )
+
+                        if data["choices"][0]['finish_reason'] is not None:
+                            temp_assistant_prompt_message = AssistantPromptMessage(
+                                content=full_response,
+                                tool_calls=[]
+                            )
+                            prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools)
+                            completion_tokens = self._num_tokens_from_messages(messages=[temp_assistant_prompt_message], tools=[])
+                            usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
+
+                            yield LLMResultChunk(
+                                model=model,
+                                prompt_messages=prompt_messages,
+                                system_fingerprint=None,
+                                delta=LLMResultChunkDelta(
+                                    index=0,
+                                    message=assistant_prompt_message,
+                                    finish_reason=data["choices"][0]['finish_reason'],
+                                    usage=usage
+                                ),
+                            )
+                        else:
+                            yield LLMResultChunk(
+                                model=model,
+                                prompt_messages=prompt_messages,
+                                system_fingerprint=None,
+                                delta=LLMResultChunkDelta(
+                                    index=0,
+                                    message=assistant_prompt_message
+                                ),
+                            )
+
+                            full_response += chunk_content
+                except (json.JSONDecodeError, KeyError, IndexError) as e:
+                    logger.info("json parse exception, content: {}".format(match.group(1).strip()))
+                    pass
+
+            buffer = buffer[last_idx:]
 
     def _invoke(self, model: str, credentials: dict,
                 prompt_messages: list[PromptMessage], model_parameters: dict,
@@ -50,9 +197,6 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
         :param user: unique user id
         :return: full response or stream response chunk generator result
         """
-        # get model mode
-        model_mode = self.get_model_mode(model, credentials)
-
         if not self.sagemaker_client:
             access_key = credentials.get('access_key')
             secret_key = credentials.get('secret_key')
@@ -68,37 +212,132 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
             else:
                 self.sagemaker_client = boto3.client("sagemaker-runtime")
 
+            sagemaker_session = Session(sagemaker_runtime_client=self.sagemaker_client)
+            self.predictor = Predictor(
+                endpoint_name=credentials.get('sagemaker_endpoint'),
+                sagemaker_session=sagemaker_session,
+                serializer=serializers.JSONSerializer(),
+            )
 
-        sagemaker_endpoint = credentials.get('sagemaker_endpoint')
-        response_model = self.sagemaker_client.invoke_endpoint(
-                    EndpointName=sagemaker_endpoint,
-                    Body=json.dumps(
-                    {
-                        "inputs": prompt_messages[0].content,
-                        "parameters": { "stop" : stop},
-                        "history" : []
-                    }
-                    ),
-                    ContentType="application/json",
-                )
 
-        assistant_text = response_model['Body'].read().decode('utf8')
+        messages:list[dict[str,Any]] = [ {"role": p.role.value, "content": p.content} for p in prompt_messages ]
+        response = inference(predictor=self.predictor, messages=messages, params=model_parameters, stop=stop, stream=stream)
 
-        # transform assistant message to prompt message
-        assistant_prompt_message = AssistantPromptMessage(
-            content=assistant_text
-        )
+        if stream:
+            if tools and len(tools) > 0:
+                raise InvokeBadRequestError(f"{model}'s tool calls does not support stream mode")
 
-        usage = self._calc_response_usage(model, credentials, 0, 0)
+            return self._handle_chat_stream_response(model=model, credentials=credentials,
+                                                     prompt_messages=prompt_messages,
+                                                     tools=tools, resp=response)
+        return self._handle_chat_generate_response(model=model, credentials=credentials,
+                                                   prompt_messages=prompt_messages,
+                                                   tools=tools, resp=response)
 
-        response = LLMResult(
-            model=model,
-            prompt_messages=prompt_messages,
-            message=assistant_prompt_message,
-            usage=usage
-        )
+    def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+        """
+        Convert PromptMessage to dict for OpenAI Compatibility API
+        """
+        if isinstance(message, UserPromptMessage):
+            message = cast(UserPromptMessage, message)
+            if isinstance(message.content, str):
+                message_dict = {"role": "user", "content": message.content}
+            else:
+                sub_messages = []
+                for message_content in message.content:
+                    if message_content.type == PromptMessageContentType.TEXT:
+                        message_content = cast(PromptMessageContent, message_content)
+                        sub_message_dict = {
+                            "type": "text",
+                            "text": message_content.data
+                        }
+                        sub_messages.append(sub_message_dict)
+                    elif message_content.type == PromptMessageContentType.IMAGE:
+                        message_content = cast(ImagePromptMessageContent, message_content)
+                        sub_message_dict = {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": message_content.data,
+                                "detail": message_content.detail.value
+                            }
+                        }
+                        sub_messages.append(sub_message_dict)
+                message_dict = {"role": "user", "content": sub_messages}
+        elif isinstance(message, AssistantPromptMessage):
+            message = cast(AssistantPromptMessage, message)
+            message_dict = {"role": "assistant", "content": message.content}
+            if message.tool_calls and len(message.tool_calls) > 0:
+                message_dict["function_call"] = {
+                    "name": message.tool_calls[0].function.name,
+                    "arguments": message.tool_calls[0].function.arguments
+                }
+        elif isinstance(message, SystemPromptMessage):
+            message = cast(SystemPromptMessage, message)
+            message_dict = {"role": "system", "content": message.content}
+        elif isinstance(message, ToolPromptMessage):
+            message = cast(ToolPromptMessage, message)
+            message_dict = {"tool_call_id": message.tool_call_id, "role": "tool", "content": message.content}
+        else:
+            raise ValueError(f"Unknown message type {type(message)}")
+
+        return message_dict
+
+    def _num_tokens_from_messages(self, messages: list[PromptMessage], tools: list[PromptMessageTool],
+                                  is_completion_model: bool = False) -> int:
+        def tokens(text: str):
+            return self._get_num_tokens_by_gpt2(text)
+
+        if is_completion_model:
+            return sum(tokens(str(message.content)) for message in messages)
+
+        tokens_per_message = 3
+        tokens_per_name = 1
+
+        num_tokens = 0
+        messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages]
+        for message in messages_dict:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                if isinstance(value, list):
+                    text = ''
+                    for item in value:
+                        if isinstance(item, dict) and item['type'] == 'text':
+                            text += item['text']
+
+                    value = text
+
+                if key == "tool_calls":
+                    for tool_call in value:
+                        for t_key, t_value in tool_call.items():
+                            num_tokens += tokens(t_key)
+                            if t_key == "function":
+                                for f_key, f_value in t_value.items():
+                                    num_tokens += tokens(f_key)
+                                    num_tokens += tokens(f_value)
+                            else:
+                                num_tokens += tokens(t_key)
+                                num_tokens += tokens(t_value)
+                if key == "function_call":
+                    for t_key, t_value in value.items():
+                        num_tokens += tokens(t_key)
+                        if t_key == "function":
+                            for f_key, f_value in t_value.items():
+                                num_tokens += tokens(f_key)
+                                num_tokens += tokens(f_value)
+                        else:
+                            num_tokens += tokens(t_key)
+                            num_tokens += tokens(t_value)
+                else:
+                    num_tokens += tokens(str(value))
 
-        return response
+                if key == "name":
+                    num_tokens += tokens_per_name
+        num_tokens += 3
+
+        if tools:
+            num_tokens += self._num_tokens_for_tools(tools)
+
+        return num_tokens
 
     def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
                        tools: Optional[list[PromptMessageTool]] = None) -> int:
@@ -112,10 +351,8 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
         :return:
         """
         # get model mode
-        model_mode = self.get_model_mode(model)
-
         try:
-            return 0
+            return self._num_tokens_from_messages(prompt_messages, tools)
         except Exception as e:
             raise self._transform_invoke_error(e)
 
@@ -129,7 +366,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
         """
         try:
             # get model mode
-            model_mode = self.get_model_mode(model)
+            pass
         except Exception as ex:
             raise CredentialsValidateFailedError(str(ex))
 
@@ -200,13 +437,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
             )
         ]
 
-        completion_type = LLMMode.value_of(credentials["mode"])
-
-        if completion_type == LLMMode.CHAT:
-            print(f"completion_type : {LLMMode.CHAT.value}") 
-
-        if completion_type == LLMMode.COMPLETION:
-            print(f"completion_type : {LLMMode.COMPLETION.value}") 
+        completion_type = LLMMode.value_of(credentials["mode"]).value
 
         features = []
 

+ 1 - 1
api/core/model_runtime/model_providers/sagemaker/rerank/rerank.py

@@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
 
 class SageMakerRerankModel(RerankModel):
     """
-    Model class for Cohere rerank model.
+    Model class for SageMaker rerank model.
     """
     sagemaker_client: Any = None
 

+ 27 - 1
api/core/model_runtime/model_providers/sagemaker/sagemaker.py

@@ -1,10 +1,11 @@
 import logging
+import uuid
+from typing import IO, Any
 
 from core.model_runtime.model_providers.__base.model_provider import ModelProvider
 
 logger = logging.getLogger(__name__)
 
-
 class SageMakerProvider(ModelProvider):
     def validate_provider_credentials(self, credentials: dict) -> None:
         """
@@ -15,3 +16,28 @@ class SageMakerProvider(ModelProvider):
         :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
         """
         pass
+
+def buffer_to_s3(s3_client:Any, file: IO[bytes], bucket:str, s3_prefix:str) -> str:
+    '''
+        return s3_uri of this file
+    '''
+    s3_key = f'{s3_prefix}{uuid.uuid4()}.mp3'
+    s3_client.put_object(
+        Body=file.read(),
+        Bucket=bucket,
+        Key=s3_key,
+        ContentType='audio/mp3'
+    )
+    return s3_key
+
+def generate_presigned_url(s3_client:Any, file: IO[bytes], bucket_name:str, s3_prefix:str, expiration=600) -> str:
+    object_key = buffer_to_s3(s3_client, file, bucket_name, s3_prefix)
+    try:
+        response = s3_client.generate_presigned_url('get_object',
+                                                    Params={'Bucket': bucket_name, 'Key': object_key},
+                                                    ExpiresIn=expiration)
+    except Exception as e:
+        print(f"Error generating presigned URL: {e}")
+        return None
+
+    return response

+ 73 - 5
api/core/model_runtime/model_providers/sagemaker/sagemaker.yaml

@@ -21,6 +21,8 @@ supported_model_types:
   - llm
   - text-embedding
   - rerank
+  - speech2text
+  - tts
 configurate_methods:
   - customizable-model
 model_credential_schema:
@@ -45,14 +47,10 @@ model_credential_schema:
         zh_Hans: 选择对话类型
         en_US: Select completion mode
       options:
-        - value: completion
-          label:
-            en_US: Completion
-            zh_Hans: 补全
         - value: chat
           label:
             en_US: Chat
-            zh_Hans: 对话
+            zh_Hans: Chat
     - variable: sagemaker_endpoint
       label:
         en_US: sagemaker endpoint
@@ -61,6 +59,76 @@ model_credential_schema:
       placeholder:
         zh_Hans: 请输出你的Sagemaker推理端点
         en_US: Enter your Sagemaker Inference endpoint
+    - variable: audio_s3_cache_bucket
+      show_on:
+        - variable: __model_type
+          value: speech2text
+      label:
+        zh_Hans: 音频缓存桶(s3 bucket)
+        en_US: audio cache bucket(s3 bucket)
+      type: text-input
+      required: true
+      placeholder:
+        zh_Hans: sagemaker-us-east-1-******207838
+        en_US: sagemaker-us-east-1-*******7838
+    - variable: audio_model_type
+      show_on:
+        - variable: __model_type
+          value: tts
+      label:
+        en_US: Audio model type
+      type: select
+      required: true
+      placeholder:
+        zh_Hans: 语音模型类型
+        en_US: Audio model type
+      options:
+        - value: PresetVoice
+          label:
+            en_US: preset voice
+            zh_Hans: 内置音色
+        - value: CloneVoice
+          label:
+            en_US: clone voice
+            zh_Hans: 克隆音色
+        - value: CloneVoice_CrossLingual
+          label:
+            en_US: crosslingual clone voice
+            zh_Hans: 跨语种克隆音色
+        - value: InstructVoice
+          label:
+            en_US: Instruct voice
+            zh_Hans: 文字指令音色
+    - variable: prompt_audio
+      show_on:
+        - variable: __model_type
+          value: tts
+      label:
+        en_US: Mock Audio Source
+      type: text-input
+      required: false
+      placeholder:
+        zh_Hans: 被模仿的音色音频
+        en_US: source audio to be mocked
+    - variable: prompt_text
+      show_on:
+        - variable: __model_type
+          value: tts
+      label:
+        en_US: Prompt Audio Text
+      type: text-input
+      required: false
+      placeholder:
+        zh_Hans: 模仿音色的对应文本
+        en_US: text for the mocked source audio
+    - variable: instruct_text
+      show_on:
+        - variable: __model_type
+          value: tts
+      label:
+        en_US: instruct text for speaker
+      type: text-input
+      required: false
     - variable: aws_access_key_id
       required: false
       label:

+ 0 - 0
api/core/model_runtime/model_providers/sagemaker/speech2text/__init__.py


+ 142 - 0
api/core/model_runtime/model_providers/sagemaker/speech2text/speech2text.py

@@ -0,0 +1,142 @@
+import json
+import logging
+from typing import IO, Any, Optional
+
+import boto3
+
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
+from core.model_runtime.model_providers.sagemaker.sagemaker import generate_presigned_url
+
+logger = logging.getLogger(__name__)
+
+class SageMakerSpeech2TextModel(Speech2TextModel):
+    """
+    Model class for Xinference speech to text model.
+    """
+    sagemaker_client: Any = None
+    s3_client : Any = None
+
+    def _invoke(self, model: str, credentials: dict,
+                file: IO[bytes], user: Optional[str] = None) \
+            -> str:
+        """
+        Invoke speech2text model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param file: audio file
+        :param user: unique user id
+        :return: text for given audio file
+        """
+        asr_text = None
+
+        try:
+            if not self.sagemaker_client:
+                access_key = credentials.get('aws_access_key_id')
+                secret_key = credentials.get('aws_secret_access_key')
+                aws_region = credentials.get('aws_region')
+                if aws_region:
+                    if access_key and secret_key:
+                        self.sagemaker_client = boto3.client("sagemaker-runtime", 
+                            aws_access_key_id=access_key,
+                            aws_secret_access_key=secret_key,
+                            region_name=aws_region)
+                        self.s3_client = boto3.client("s3",
+                            aws_access_key_id=access_key,
+                            aws_secret_access_key=secret_key,
+                            region_name=aws_region)
+                    else:
+                        self.sagemaker_client = boto3.client("sagemaker-runtime", region_name=aws_region)
+                        self.s3_client = boto3.client("s3", region_name=aws_region)
+                else:
+                    self.sagemaker_client = boto3.client("sagemaker-runtime")
+                    self.s3_client = boto3.client("s3")
+
+            s3_prefix='dify/speech2text/'
+            sagemaker_endpoint = credentials.get('sagemaker_endpoint')
+            bucket = credentials.get('audio_s3_cache_bucket')
+
+            s3_presign_url = generate_presigned_url(self.s3_client, file, bucket, s3_prefix)
+            payload = {
+                "audio_s3_presign_uri" : s3_presign_url
+            }
+
+            response_model = self.sagemaker_client.invoke_endpoint(
+                EndpointName=sagemaker_endpoint,
+                Body=json.dumps(payload),
+                ContentType="application/json"
+            )
+            json_str = response_model['Body'].read().decode('utf8')
+            json_obj = json.loads(json_str)
+            asr_text = json_obj['text']
+        except Exception as e:
+            logger.exception(f'Exception {e}, line : {line}')
+
+        return asr_text
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        pass
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [
+                InvokeConnectionError
+            ],
+            InvokeServerUnavailableError: [
+                InvokeServerUnavailableError
+            ],
+            InvokeRateLimitError: [
+                InvokeRateLimitError
+            ],
+            InvokeAuthorizationError: [
+                InvokeAuthorizationError
+            ],
+            InvokeBadRequestError: [
+                InvokeBadRequestError,
+                KeyError,
+                ValueError
+            ]
+        }
+
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
+        """
+            used to define customizable model schema
+        """
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(
+                en_US=model
+            ),
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_type=ModelType.SPEECH2TEXT,
+            model_properties={ },
+            parameter_rules=[]
+        )
+
+        return entity

+ 0 - 0
api/core/model_runtime/model_providers/sagemaker/tts/__init__.py


+ 287 - 0
api/core/model_runtime/model_providers/sagemaker/tts/tts.py

@@ -0,0 +1,287 @@
+import concurrent.futures
+import copy
+import json
+import logging
+from enum import Enum
+from typing import Any, Optional
+
+import boto3
+import requests
+
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.model_providers.__base.tts_model import TTSModel
+
+logger = logging.getLogger(__name__)
+
+class TTSModelType(Enum):
+    PresetVoice = "PresetVoice"
+    CloneVoice = "CloneVoice"
+    CloneVoice_CrossLingual = "CloneVoice_CrossLingual"
+    InstructVoice = "InstructVoice"
+
+class SageMakerText2SpeechModel(TTSModel):
+
+    sagemaker_client: Any = None
+    s3_client : Any = None
+    comprehend_client : Any = None
+
+    def __init__(self):
+        # preset voices, need support custom voice
+        self.model_voices = {
+            '__default': {
+                'all': [
+                    {'name': 'Default', 'value': 'default'},
+                ]
+            },
+            'CosyVoice': {
+                'zh-Hans': [
+                    {'name': '中文男', 'value': '中文男'},
+                    {'name': '中文女', 'value': '中文女'},
+                    {'name': '粤语女', 'value': '粤语女'},
+                ],
+                'zh-Hant': [
+                    {'name': '中文男', 'value': '中文男'},
+                    {'name': '中文女', 'value': '中文女'},
+                    {'name': '粤语女', 'value': '粤语女'},
+                ],
+                'en-US': [
+                    {'name': '英文男', 'value': '英文男'},
+                    {'name': '英文女', 'value': '英文女'},
+                ],
+                'ja-JP': [
+                    {'name': '日语男', 'value': '日语男'},
+                ],
+                'ko-KR': [
+                    {'name': '韩语女', 'value': '韩语女'},
+                ]
+            }
+        }
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+                Validate model credentials
+
+                :param model: model name
+                :param credentials: model credentials
+                :return:
+                """
+        pass
+
+    def _detect_lang_code(self, content:str, map_dict:dict=None):
+        map_dict = {
+            "zh" : "<|zh|>",
+            "en" : "<|en|>",
+            "ja" : "<|jp|>",
+            "zh-TW" : "<|yue|>",
+            "ko" : "<|ko|>"
+        }
+
+        response = self.comprehend_client.detect_dominant_language(Text=content)
+        language_code = response['Languages'][0]['LanguageCode']
+
+        return map_dict.get(language_code, '<|zh|>')
+
+    def _build_tts_payload(self, model_type:str, content_text:str, model_role:str, prompt_text:str, prompt_audio:str, instruct_text:str):
+        if model_type == TTSModelType.PresetVoice.value and model_role:
+            return { "tts_text" : content_text, "role" : model_role }
+        if model_type == TTSModelType.CloneVoice.value and prompt_text and prompt_audio:
+            return { "tts_text" : content_text, "prompt_text": prompt_text, "prompt_audio" : prompt_audio }
+        if model_type ==  TTSModelType.CloneVoice_CrossLingual.value and prompt_audio:
+            lang_tag = self._detect_lang_code(content_text)
+            return { "tts_text" : f"{content_text}", "prompt_audio" : prompt_audio, "lang_tag" : lang_tag }
+        if model_type ==  TTSModelType.InstructVoice.value and instruct_text and model_role:
+            return { "tts_text" : content_text, "role" : model_role, "instruct_text" : instruct_text }
+
+        raise RuntimeError(f"Invalid params for {model_type}")
+
+    def _invoke(self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str,
+                user: Optional[str] = None):
+        """
+        _invoke text2speech model
+
+        :param model: model name
+        :param tenant_id: user tenant id
+        :param credentials: model credentials
+        :param voice: model timbre
+        :param content_text: text content to be translated
+        :param user: unique user id
+        :return: text translated to audio file
+        """
+        if not self.sagemaker_client:
+            access_key = credentials.get('aws_access_key_id')
+            secret_key = credentials.get('aws_secret_access_key')
+            aws_region = credentials.get('aws_region')
+            if aws_region:
+                if access_key and secret_key:
+                    self.sagemaker_client = boto3.client("sagemaker-runtime", 
+                        aws_access_key_id=access_key,
+                        aws_secret_access_key=secret_key,
+                        region_name=aws_region)
+                    self.s3_client = boto3.client("s3",
+                        aws_access_key_id=access_key,
+                        aws_secret_access_key=secret_key,
+                        region_name=aws_region)
+                    self.comprehend_client = boto3.client('comprehend',
+                        aws_access_key_id=access_key,
+                        aws_secret_access_key=secret_key,
+                        region_name=aws_region)
+                else:
+                    self.sagemaker_client = boto3.client("sagemaker-runtime", region_name=aws_region)
+                    self.s3_client = boto3.client("s3", region_name=aws_region)
+                    self.comprehend_client = boto3.client('comprehend', region_name=aws_region)
+            else:
+                self.sagemaker_client = boto3.client("sagemaker-runtime")
+                self.s3_client = boto3.client("s3")
+                self.comprehend_client = boto3.client('comprehend')
+
+        model_type = credentials.get('audio_model_type', 'PresetVoice')
+        prompt_text = credentials.get('prompt_text')
+        prompt_audio = credentials.get('prompt_audio')
+        instruct_text = credentials.get('instruct_text')
+        sagemaker_endpoint = credentials.get('sagemaker_endpoint')
+        payload = self._build_tts_payload(
+            model_type, 
+            content_text, 
+            voice, 
+            prompt_text, 
+            prompt_audio, 
+            instruct_text
+        )
+
+        return self._tts_invoke_streaming(model_type, payload, sagemaker_endpoint)
+
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
+        """
+            used to define customizable model schema
+        """
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(
+                en_US=model
+            ),
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_type=ModelType.TTS,
+            model_properties={},
+            parameter_rules=[]
+        )
+
+        return entity
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [
+                InvokeConnectionError
+            ],
+            InvokeServerUnavailableError: [
+                InvokeServerUnavailableError
+            ],
+            InvokeRateLimitError: [
+                InvokeRateLimitError
+            ],
+            InvokeAuthorizationError: [
+                InvokeAuthorizationError
+            ],
+            InvokeBadRequestError: [
+                InvokeBadRequestError,
+                KeyError,
+                ValueError
+            ]
+        }
+
+    def _get_model_default_voice(self, model: str, credentials: dict) -> any:
+        return ""
+
+    def _get_model_word_limit(self, model: str, credentials: dict) -> int:
+        return 15
+
+    def _get_model_audio_type(self, model: str, credentials: dict) -> str:
+        return "mp3"
+
+    def _get_model_workers_limit(self, model: str, credentials: dict) -> int:
+        return 5
+
+    def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list:
+        audio_model_name = 'CosyVoice'
+        for key, voices in self.model_voices.items():
+            if key in audio_model_name:
+                if language and language in voices:
+                    return voices[language]
+                elif 'all' in voices:
+                    return voices['all']
+
+        return self.model_voices['__default']['all']
+
+    def _invoke_sagemaker(self, payload:dict, endpoint:str):
+        response_model = self.sagemaker_client.invoke_endpoint(
+            EndpointName=endpoint,
+            Body=json.dumps(payload),
+            ContentType="application/json",
+        )
+        json_str = response_model['Body'].read().decode('utf8')
+        json_obj = json.loads(json_str)
+        return json_obj
+
+    def _tts_invoke_streaming(self, model_type:str, payload:dict, sagemaker_endpoint:str) -> any:
+        """
+        _tts_invoke_streaming text2speech model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param content_text: text content to be translated
+        :param voice: model timbre
+        :return: text translated to audio file
+        """
+        try:
+            lang_tag = ''
+            if model_type == TTSModelType.CloneVoice_CrossLingual.value:
+                lang_tag = payload.pop('lang_tag')
+            
+            word_limit = self._get_model_word_limit(model='', credentials={})
+            content_text = payload.get("tts_text")
+            if len(content_text) > word_limit:
+                split_sentences = self._split_text_into_sentences(content_text, max_length=word_limit)
+                sentences = [ f"{lang_tag}{s}" for s in split_sentences if len(s) ]
+                len_sent = len(sentences)
+                executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len_sent))
+                payloads = [ copy.deepcopy(payload) for i in range(len_sent) ]
+                for idx in range(len_sent):
+                    payloads[idx]["tts_text"] = sentences[idx]
+
+                futures = [ executor.submit(
+                    self._invoke_sagemaker,
+                    payload=payload,
+                    endpoint=sagemaker_endpoint,
+                )
+                    for payload in payloads]
+
+                for index, future in enumerate(futures):
+                    resp = future.result()
+                    audio_bytes = requests.get(resp.get('s3_presign_url')).content
+                    for i in range(0, len(audio_bytes), 1024):
+                        yield audio_bytes[i:i + 1024]
+            else:
+                resp = self._invoke_sagemaker(payload, sagemaker_endpoint)
+                audio_bytes = requests.get(resp.get('s3_presign_url')).content
+
+                for i in range(0, len(audio_bytes), 1024):
+                    yield audio_bytes[i:i + 1024]
+        except Exception as ex:
+            raise InvokeBadRequestError(str(ex))

+ 6 - 3
api/core/tools/provider/builtin/aws/tools/apply_guardrail.py

@@ -3,6 +3,7 @@ import logging
 from typing import Any, Union
 
 import boto3
+from botocore.exceptions import BotoCoreError
 from pydantic import BaseModel, Field
 
 from core.tools.entities.tool_entities import ToolInvokeMessage
@@ -16,7 +17,7 @@ class GuardrailParameters(BaseModel):
     guardrail_version: str = Field(..., description="The version of the guardrail")
     source: str = Field(..., description="The source of the content")
     text: str = Field(..., description="The text to apply the guardrail to")
-    aws_region: str = Field(default="us-east-1", description="AWS region for the Bedrock client")
+    aws_region: str = Field(..., description="AWS region for the Bedrock client")
 
 class ApplyGuardrailTool(BuiltinTool):
     def _invoke(self,
@@ -40,6 +41,8 @@ class ApplyGuardrailTool(BuiltinTool):
                 source=params.source,
                 content=[{"text": {"text": params.text}}]
             )
+            
+            logger.info(f"Raw response from AWS: {json.dumps(response, indent=2)}")
 
             # Check for empty response
             if not response:
@@ -69,7 +72,7 @@ class ApplyGuardrailTool(BuiltinTool):
 
             return self.create_text_message(text=result)
 
-        except boto3.exceptions.BotoCoreError as e:
+        except BotoCoreError as e:
             error_message = f'AWS service error: {str(e)}'
             logger.error(error_message, exc_info=True)
             return self.create_text_message(text=error_message)
@@ -80,4 +83,4 @@ class ApplyGuardrailTool(BuiltinTool):
         except Exception as e:
             error_message = f'An unexpected error occurred: {str(e)}'
             logger.error(error_message, exc_info=True)
-            return self.create_text_message(text=error_message)
+            return self.create_text_message(text=error_message)

+ 11 - 0
api/core/tools/provider/builtin/aws/tools/apply_guardrail.yaml

@@ -54,3 +54,14 @@ parameters:
       zh_Hans: 用于请求护栏审查的内容,可以是用户输入或 LLM 输出。
     llm_description: The content used for requesting guardrail review, which can be either user input or LLM output.
     form: llm
+  - name: aws_region
+    type: string
+    required: true
+    label:
+      en_US: AWS Region
+      zh_Hans: AWS 区域
+    human_description:
+      en_US: Please enter the AWS region for the Bedrock client, for example 'us-east-1'.
+      zh_Hans: 请输入 Bedrock 客户端的 AWS 区域,例如 'us-east-1'。
+    llm_description: Please enter the AWS region for the Bedrock client, for example 'us-east-1'.
+    form: form

+ 71 - 0
api/core/tools/provider/builtin/aws/tools/lambda_yaml_to_json.py

@@ -0,0 +1,71 @@
+import json
+import logging
+from typing import Any, Union
+
+import boto3
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+console_handler = logging.StreamHandler()
+logger.addHandler(console_handler)
+
+
+class LambdaYamlToJsonTool(BuiltinTool):
+    lambda_client: Any = None
+
+    def _invoke_lambda(self, lambda_name: str, yaml_content: str) -> str:
+        msg = { 
+            "body": yaml_content
+        }
+        logger.info(json.dumps(msg))
+
+        invoke_response = self.lambda_client.invoke(FunctionName=lambda_name,
+                                               InvocationType='RequestResponse',
+                                               Payload=json.dumps(msg))
+        response_body = invoke_response['Payload']
+
+        response_str = response_body.read().decode("utf-8")
+        resp_json = json.loads(response_str)
+
+        logger.info(resp_json)
+        if resp_json['statusCode'] != 200:
+            raise Exception(f"Invalid status code: {response_str}")
+
+        return resp_json['body']
+
+    def _invoke(self, 
+                user_id: str, 
+               tool_parameters: dict[str, Any], 
+        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+            invoke tools
+        """
+        try:
+            if not self.lambda_client:
+                aws_region = tool_parameters.get('aws_region') # todo: move aws_region out, and update client region 
+                if aws_region:
+                    self.lambda_client = boto3.client("lambda", region_name=aws_region)
+                else:
+                    self.lambda_client = boto3.client("lambda")
+
+            yaml_content = tool_parameters.get('yaml_content', '')
+            if not yaml_content:
+                return self.create_text_message('Please input yaml_content')
+
+            lambda_name = tool_parameters.get('lambda_name', '')
+            if not lambda_name:
+                return self.create_text_message('Please input lambda_name')
+            logger.debug(f'{json.dumps(tool_parameters, indent=2, ensure_ascii=False)}')
+            
+            result = self._invoke_lambda(lambda_name, yaml_content)
+            logger.debug(result)
+            
+            return self.create_text_message(result)
+        except Exception as e:
+            return self.create_text_message(f'Exception: {str(e)}')
+
+        console_handler.flush()

+ 53 - 0
api/core/tools/provider/builtin/aws/tools/lambda_yaml_to_json.yaml

@@ -0,0 +1,53 @@
+identity:
+  name: lambda_yaml_to_json
+  author: AWS
+  label:
+    en_US: LambdaYamlToJson
+    zh_Hans: LambdaYamlToJson
+    pt_BR: LambdaYamlToJson
+  icon: icon.svg
+description:
+  human:
+    en_US: A tool to convert yaml to json using AWS Lambda.
+    zh_Hans: 将 YAML 转为 JSON 的工具(通过AWS Lambda)。
+    pt_BR: A tool to convert yaml to json using AWS Lambda.
+  llm: A tool to convert yaml to json.
+parameters:
+  - name: yaml_content
+    type: string
+    required: true
+    label:
+      en_US: YAML content to convert for
+      zh_Hans: YAML 内容
+      pt_BR: YAML content to convert for
+    human_description:
+      en_US: YAML content to convert for
+      zh_Hans: YAML 内容
+      pt_BR: YAML content to convert for
+    llm_description: YAML content to convert for
+    form: llm
+  - name: aws_region
+    type: string
+    required: false
+    label:
+      en_US: region of lambda
+      zh_Hans: Lambda 所在的region
+      pt_BR: region of lambda
+    human_description:
+      en_US: region of lambda
+      zh_Hans: Lambda 所在的region
+      pt_BR: region of lambda
+    llm_description: region of lambda
+    form: form
+  - name: lambda_name
+    type: string
+    required: false
+    label:
+      en_US: name of lambda
+      zh_Hans: Lambda 名称
+      pt_BR: name of lambda
+    human_description:
+      en_US: name of lambda
+      zh_Hans: Lambda 名称
+      pt_BR: name of lambda
+    form: form

+ 2 - 4
api/core/tools/provider/builtin/aws/tools/sagemaker_text_rerank.py

@@ -78,9 +78,7 @@ class SageMakerReRankTool(BuiltinTool):
             sorted_candidate_docs = sorted(candidate_docs, key=lambda x: x['score'], reverse=True)
 
             line = 9
-            results_str = json.dumps(sorted_candidate_docs[:self.topk], ensure_ascii=False)
-            return self.create_text_message(text=results_str)
+            return [ self.create_json_message(res) for res in sorted_candidate_docs[:self.topk] ]
             
         except Exception as e:
-            return self.create_text_message(f'Exception {str(e)}, line : {line}')
-    
+            return self.create_text_message(f'Exception {str(e)}, line : {line}')

+ 95 - 0
api/core/tools/provider/builtin/aws/tools/sagemaker_tts.py

@@ -0,0 +1,95 @@
+import json
+from enum import Enum
+from typing import Any, Union
+
+import boto3
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class TTSModelType(Enum):
+    PresetVoice = "PresetVoice"
+    CloneVoice = "CloneVoice"
+    CloneVoice_CrossLingual = "CloneVoice_CrossLingual"
+    InstructVoice = "InstructVoice"
+
+class SageMakerTTSTool(BuiltinTool):
+    sagemaker_client: Any = None
+    sagemaker_endpoint:str = None
+    s3_client : Any = None
+    comprehend_client : Any = None
+
+    def _detect_lang_code(self, content:str, map_dict:dict=None):
+        map_dict = {
+            "zh" : "<|zh|>",
+            "en" : "<|en|>",
+            "ja" : "<|jp|>",
+            "zh-TW" : "<|yue|>",
+            "ko" : "<|ko|>"
+        }
+
+        response = self.comprehend_client.detect_dominant_language(Text=content)
+        language_code = response['Languages'][0]['LanguageCode']
+        return map_dict.get(language_code, '<|zh|>')
+
+    def _build_tts_payload(self, model_type:str, content_text:str, model_role:str, prompt_text:str, prompt_audio:str, instruct_text:str):
+        if model_type == TTSModelType.PresetVoice.value and model_role:
+            return { "tts_text" : content_text, "role" : model_role }
+        if model_type == TTSModelType.CloneVoice.value and prompt_text and prompt_audio:
+            return { "tts_text" : content_text, "prompt_text": prompt_text, "prompt_audio" : prompt_audio }
+        if model_type ==  TTSModelType.CloneVoice_CrossLingual.value and prompt_audio:
+            lang_tag = self._detect_lang_code(content_text)
+            return { "tts_text" : f"{content_text}", "prompt_audio" : prompt_audio, "lang_tag" : lang_tag }
+        if model_type ==  TTSModelType.InstructVoice.value and instruct_text and model_role:
+            return { "tts_text" : content_text, "role" : model_role, "instruct_text" : instruct_text }
+
+        raise RuntimeError(f"Invalid params for {model_type}")
+
+    def _invoke_sagemaker(self, payload:dict, endpoint:str):
+        response_model = self.sagemaker_client.invoke_endpoint(
+            EndpointName=endpoint,
+            Body=json.dumps(payload),
+            ContentType="application/json",
+        )
+        json_str = response_model['Body'].read().decode('utf8')
+        json_obj = json.loads(json_str)
+        return json_obj
+
+    def _invoke(self, 
+                user_id: str, 
+               tool_parameters: dict[str, Any], 
+        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+            invoke tools
+        """
+        try:
+            if not self.sagemaker_client:
+                aws_region = tool_parameters.get('aws_region')
+                if aws_region:
+                    self.sagemaker_client = boto3.client("sagemaker-runtime", region_name=aws_region)
+                    self.s3_client = boto3.client("s3", region_name=aws_region)
+                    self.comprehend_client = boto3.client('comprehend', region_name=aws_region)
+                else:
+                    self.sagemaker_client = boto3.client("sagemaker-runtime")
+                    self.s3_client = boto3.client("s3")
+                    self.comprehend_client = boto3.client('comprehend')
+
+            if not self.sagemaker_endpoint:
+                self.sagemaker_endpoint = tool_parameters.get('sagemaker_endpoint')
+
+            tts_text = tool_parameters.get('tts_text')
+            tts_infer_type = tool_parameters.get('tts_infer_type')
+
+            voice = tool_parameters.get('voice')
+            mock_voice_audio = tool_parameters.get('mock_voice_audio')
+            mock_voice_text = tool_parameters.get('mock_voice_text')
+            voice_instruct_prompt = tool_parameters.get('voice_instruct_prompt')
+            payload = self._build_tts_payload(tts_infer_type, tts_text, voice, mock_voice_text, mock_voice_audio, voice_instruct_prompt)
+
+            result = self._invoke_sagemaker(payload, self.sagemaker_endpoint)
+
+            return self.create_text_message(text=result['s3_presign_url'])
+            
+        except Exception as e:
+            return self.create_text_message(f'Exception {str(e)}')

+ 149 - 0
api/core/tools/provider/builtin/aws/tools/sagemaker_tts.yaml

@@ -0,0 +1,149 @@
+identity:
+  name: sagemaker_tts
+  author: AWS
+  label:
+    en_US: SagemakerTTS
+    zh_Hans: Sagemaker语音合成
+    pt_BR: SagemakerTTS
+  icon: icon.svg
+description:
+  human:
+    en_US: A tool for Speech synthesis - https://github.com/aws-samples/dify-aws-tool
+    zh_Hans: Sagemaker语音合成工具, 请参考 Github Repo - https://github.com/aws-samples/dify-aws-tool上的部署脚本
+    pt_BR: A tool for Speech synthesis.
+  llm: A tool for Speech synthesis. You can find deploy notebook on Github Repo - https://github.com/aws-samples/dify-aws-tool
+parameters:
+  - name: sagemaker_endpoint
+    type: string
+    required: true
+    label:
+      en_US: sagemaker endpoint for tts
+      zh_Hans: 语音生成的SageMaker端点
+      pt_BR: sagemaker endpoint for tts
+    human_description:
+      en_US: sagemaker endpoint for tts
+      zh_Hans: 语音生成的SageMaker端点
+      pt_BR: sagemaker endpoint for tts
+    llm_description: sagemaker endpoint for tts
+    form: form
+  - name: tts_text
+    type: string
+    required: true
+    label:
+      en_US: tts text
+      zh_Hans: 语音合成原文
+      pt_BR: tts text
+    human_description:
+      en_US: tts text
+      zh_Hans: 语音合成原文
+      pt_BR: tts text
+    llm_description: tts text
+    form: llm
+  - name: tts_infer_type
+    type: select
+    required: false
+    label:
+      en_US: tts infer type
+      zh_Hans: 合成方式
+      pt_BR: tts infer type
+    human_description:
+      en_US: tts infer type
+      zh_Hans: 合成方式
+      pt_BR: tts infer type
+    llm_description: tts infer type
+    options:
+      - value: PresetVoice
+        label:
+          en_US: preset voice
+          zh_Hans: 预置音色
+      - value: CloneVoice
+        label:
+          en_US: clone voice
+          zh_Hans: 克隆音色
+      - value: CloneVoice_CrossLingual
+        label:
+          en_US: clone crossLingual voice
+          zh_Hans: 克隆音色(跨语言)
+      - value: InstructVoice
+        label:
+          en_US: instruct voice
+          zh_Hans: 指令音色
+    form: form
+  - name: voice
+    type: select
+    required: false
+    label:
+      en_US: preset voice
+      zh_Hans: 预置音色
+      pt_BR: preset voice
+    human_description:
+      en_US: preset voice
+      zh_Hans: 预置音色
+      pt_BR: preset voice
+    llm_description: preset voice
+    options:
+      - value: 中文男
+        label:
+          en_US: zh-cn male
+          zh_Hans: 中文男
+      - value: 中文女
+        label:
+          en_US: zh-cn female
+          zh_Hans: 中文女
+      - value: 粤语女
+        label:
+          en_US: zh-TW female
+          zh_Hans: 粤语女
+    form: form
+  - name: mock_voice_audio
+    type: string
+    required: false
+    label:
+      en_US: clone voice link
+      zh_Hans: 克隆音频链接
+      pt_BR: clone voice link
+    human_description:
+      en_US: clone voice link
+      zh_Hans: 克隆音频链接
+      pt_BR: clone voice link
+    llm_description: clone voice link
+    form: llm
+  - name: mock_voice_text
+    type: string
+    required: false
+    label:
+      en_US: text of clone voice
+      zh_Hans: 克隆音频对应文本
+      pt_BR: text of clone voice
+    human_description:
+      en_US: text of clone voice
+      zh_Hans: 克隆音频对应文本
+      pt_BR: text of clone voice
+    llm_description: text of clone voice
+    form: llm
+  - name: voice_instruct_prompt
+    type: string
+    required: false
+    label:
+      en_US: instruct prompt for voice
+      zh_Hans: 音色指令文本
+      pt_BR: instruct prompt for voice
+    human_description:
+      en_US: instruct prompt for voice
+      zh_Hans: 音色指令文本
+      pt_BR: instruct prompt for voice
+    llm_description: instruct prompt for voice
+    form: llm
+  - name: aws_region
+    type: string
+    required: false
+    label:
+      en_US: region of sagemaker endpoint
+      zh_Hans: SageMaker 端点所在的region
+      pt_BR: region of sagemaker endpoint
+    human_description:
+      en_US: region of sagemaker endpoint
+      zh_Hans: SageMaker 端点所在的region
+      pt_BR: region of sagemaker endpoint
+    llm_description: region of sagemaker endpoint
+    form: form

Різницю між файлами не показано, бо вона завелика
+ 260 - 15
api/poetry.lock


+ 1 - 0
api/pyproject.toml

@@ -113,6 +113,7 @@ azure-identity = "1.16.1"
 azure-storage-blob = "12.13.0"
 beautifulsoup4 = "4.12.2"
 boto3 = "1.34.148"
+sagemaker = "2.231.0"
 bs4 = "~0.0.1"
 cachetools = "~5.3.0"
 celery = "~5.3.6"