Browse Source

add new provider Solar (#6884)

JuHyung Son 8 months ago
parent
commit
2e941bb91c
22 changed files with 1328 additions and 2 deletions
  1. 1 0
      api/core/model_runtime/model_providers/_position.yaml
  2. 0 0
      api/core/model_runtime/model_providers/upstage/__init__.py
  3. 14 0
      api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg
  4. 3 0
      api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg
  5. 57 0
      api/core/model_runtime/model_providers/upstage/_common.py
  6. 0 0
      api/core/model_runtime/model_providers/upstage/llm/__init__.py
  7. 1 0
      api/core/model_runtime/model_providers/upstage/llm/_position.yaml
  8. 575 0
      api/core/model_runtime/model_providers/upstage/llm/llm.py
  9. 43 0
      api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml
  10. 0 0
      api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py
  11. 9 0
      api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml
  12. 9 0
      api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml
  13. 195 0
      api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
  14. 32 0
      api/core/model_runtime/model_providers/upstage/upstage.py
  15. 49 0
      api/core/model_runtime/model_providers/upstage/upstage.yaml
  16. 1 1
      api/docker/entrypoint.sh
  17. 1 0
      api/pyproject.toml
  18. 0 0
      api/tests/integration_tests/model_runtime/upstage/__init__.py
  19. 245 0
      api/tests/integration_tests/model_runtime/upstage/test_llm.py
  20. 23 0
      api/tests/integration_tests/model_runtime/upstage/test_provider.py
  21. 67 0
      api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py
  22. 3 1
      dev/pytest/pytest_model_runtime.sh

+ 1 - 0
api/core/model_runtime/model_providers/_position.yaml

@@ -6,6 +6,7 @@
 - nvidia
 - nvidia_nim
 - cohere
+- upstage
 - bedrock
 - togetherai
 - openrouter

+ 0 - 0
api/core/model_runtime/model_providers/upstage/__init__.py


File diff suppressed because it is too large
+ 14 - 0
api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg


File diff suppressed because it is too large
+ 3 - 0
api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg


+ 57 - 0
api/core/model_runtime/model_providers/upstage/_common.py

@@ -0,0 +1,57 @@
+
+from collections.abc import Mapping
+
+import openai
+from httpx import Timeout
+
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+
+
+class _CommonUpstage:
+    def _to_credential_kwargs(self, credentials: Mapping) -> dict:
+        """
+        Transform credentials to kwargs for model instance
+
+        :param credentials:
+        :return: 
+        """
+        credentials_kwargs = {
+            "api_key": credentials['upstage_api_key'],
+            "base_url": "https://api.upstage.ai/v1/solar",
+            "timeout": Timeout(315.0, read=300.0, write=20.0, connect=10.0),
+            "max_retries": 1
+        }
+
+        return credentials_kwargs
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError],
+            InvokeServerUnavailableError: [openai.InternalServerError],
+            InvokeRateLimitError: [openai.RateLimitError],
+            InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError],
+            InvokeBadRequestError: [
+                openai.BadRequestError,
+                openai.NotFoundError,
+                openai.UnprocessableEntityError,
+                openai.APIError,
+            ],
+        }
+        
+

+ 0 - 0
api/core/model_runtime/model_providers/upstage/llm/__init__.py


+ 1 - 0
api/core/model_runtime/model_providers/upstage/llm/_position.yaml

@@ -0,0 +1 @@
+- soloar-1-mini-chat

+ 575 - 0
api/core/model_runtime/model_providers/upstage/llm/llm.py

@@ -0,0 +1,575 @@
+import logging
+from collections.abc import Generator
+from typing import Optional, Union, cast
+
+from openai import OpenAI, Stream
+from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_chunk import ChoiceDeltaFunctionCall, ChoiceDeltaToolCall
+from openai.types.chat.chat_completion_message import FunctionCall
+from tokenizers import Tokenizer
+
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    ImagePromptMessageContent,
+    PromptMessage,
+    PromptMessageContentType,
+    PromptMessageTool,
+    SystemPromptMessage,
+    TextPromptMessageContent,
+    ToolPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.upstage._common import _CommonUpstage
+
+logger = logging.getLogger(__name__)
+
+UPSTAGE_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
+if you are not sure about the structure.
+
+<instructions>
+{{instructions}}
+</instructions>
+"""
+
+class UpstageLargeLanguageModel(_CommonUpstage, LargeLanguageModel):
+    """
+    Model class for Upstage large language model. 
+    """
+
+    def _invoke(self, model: str, credentials: dict,
+                prompt_messages: list[PromptMessage], model_parameters: dict,
+                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
+        """
+        Invoke large language model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+
+        return self._chat_generate(
+            model=model,
+            credentials=credentials,
+            prompt_messages=prompt_messages,
+            model_parameters=model_parameters,
+            tools=tools,
+            stop=stop,
+            stream=stream,
+            user=user
+        )
+
+    def _code_block_mode_wrapper(self,
+                                 model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None, callbacks: Optional[list[Callback]] = None) -> Union[LLMResult, Generator]:
+        """
+        Code block mode wrapper for invoking large language model
+        """
+        if 'response_format' in model_parameters and model_parameters['response_format'] in ['JSON', 'XML']:
+            stop = stop or []
+            self._transform_chat_json_prompts(
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+                response_format=model_parameters['response_format']
+            )
+            model_parameters.pop('response_format')
+
+            return self._invoke(
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user
+            )
+
+    def _transform_chat_json_prompts(self, model: str, credentials: dict,
+                                     prompt_messages: list[PromptMessage], model_parameters: dict,
+                                     tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
+                                     stream: bool = True, user: str | None = None, response_format: str = 'JSON') -> None:
+        """
+        Transform json prompts 
+        """
+        if stop is None:
+            stop = []
+        if "```\n" not in stop:
+            stop.append("```\n")
+        if "\n```" not in stop:
+            stop.append("\n```")
+
+        if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
+            prompt_messages[0] = SystemPromptMessage(
+                content=UPSTAGE_BLOCK_MODE_PROMPT
+                    .replace("{{instructions}}", prompt_messages[0].content)
+                    .replace("{{block}}", response_format)
+            )
+            prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}\n"))
+        else:
+            prompt_messages.insert(0, SystemPromptMessage(
+                                   content=UPSTAGE_BLOCK_MODE_PROMPT
+                                       .replace("{{instructions}}", f"Please output a valid {response_format} object.")
+                                       .replace("{{block}}", response_format)
+            ))
+            prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
+
+    def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], tools: Optional[list[PromptMessageTool]] = None) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return:
+        """
+        return self._num_tokens_from_messages(model, prompt_messages, tools)
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            client = OpenAI(**credentials_kwargs)
+
+            client.chat.completions.create(
+                messages=[{"role": "user", "content": "ping"}],
+                model=model,
+                temperature=0,
+                max_tokens=10,
+                stream=False
+            )
+        except Exception as e:
+            raise CredentialsValidateFailedError(str(e))
+
+    def _chat_generate(self, model: str, credentials: dict,
+                       prompt_messages: list[PromptMessage], model_parameters: dict,
+                       tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                       stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        client = OpenAI(**credentials_kwargs)
+
+        extra_model_kwargs = {}
+
+        if tools:
+            extra_model_kwargs["functions"] = [{
+                "name": tool.name,
+                "description": tool.description,
+                "parameters": tool.parameters
+            } for tool in tools]
+
+        if stop:
+            extra_model_kwargs["stop"] = stop
+
+        if user:
+            extra_model_kwargs["user"] = user
+
+        # chat model
+        response = client.chat.completions.create(
+            messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages],
+            model=model,
+            stream=stream,
+            **model_parameters,
+            **extra_model_kwargs,
+        )
+
+        if stream:
+            return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools)
+        return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools)
+        
+    def _handle_chat_generate_response(self, model: str, credentials: dict, response: ChatCompletion,
+                                       prompt_messages: list[PromptMessage],
+                                       tools: Optional[list[PromptMessageTool]] = None) -> LLMResult:
+        """
+        Handle llm chat response
+
+        :param model: model name
+        :param credentials: credentials
+        :param response: response
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return: llm response
+        """
+        assistant_message = response.choices[0].message
+        # assistant_message_tool_calls = assistant_message.tool_calls
+        assistant_message_function_call = assistant_message.function_call
+
+        # extract tool calls from response
+        # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+        function_call = self._extract_response_function_call(assistant_message_function_call)
+        tool_calls = [function_call] if function_call else []
+
+        # transform assistant message to prompt message
+        assistant_prompt_message = AssistantPromptMessage(
+            content=assistant_message.content,
+            tool_calls=tool_calls
+        )
+
+        # calculate num tokens
+        if response.usage:
+            # transform usage
+            prompt_tokens = response.usage.prompt_tokens
+            completion_tokens = response.usage.completion_tokens
+        else:
+            # calculate num tokens
+            prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+            completion_tokens = self._num_tokens_from_messages(model, [assistant_prompt_message])
+
+        # transform usage
+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+
+        # transform response
+        response = LLMResult(
+            model=response.model,
+            prompt_messages=prompt_messages,
+            message=assistant_prompt_message,
+            usage=usage,
+            system_fingerprint=response.system_fingerprint,
+        )
+
+        return response
+
+    def _handle_chat_generate_stream_response(self, model: str, credentials: dict, response: Stream[ChatCompletionChunk],
+                                              prompt_messages: list[PromptMessage],
+                                              tools: Optional[list[PromptMessageTool]] = None) -> Generator:
+        """
+        Handle llm chat stream response
+
+        :param model: model name
+        :param response: response
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return: llm response chunk generator
+        """
+        full_assistant_content = ''
+        delta_assistant_message_function_call_storage: Optional[ChoiceDeltaFunctionCall] = None
+        prompt_tokens = 0
+        completion_tokens = 0
+        final_tool_calls = []
+        final_chunk = LLMResultChunk(
+            model=model,
+            prompt_messages=prompt_messages,
+            delta=LLMResultChunkDelta(
+                index=0,
+                message=AssistantPromptMessage(content=''),
+            )
+        )
+
+        for chunk in response:
+            if len(chunk.choices) == 0:
+                if chunk.usage:
+                    # calculate num tokens
+                    prompt_tokens = chunk.usage.prompt_tokens
+                    completion_tokens = chunk.usage.completion_tokens
+                continue
+
+            delta = chunk.choices[0]
+            has_finish_reason = delta.finish_reason is not None
+
+            if not has_finish_reason and (delta.delta.content is None or delta.delta.content == '') and \
+                delta.delta.function_call is None:
+                continue
+
+            # assistant_message_tool_calls = delta.delta.tool_calls
+            assistant_message_function_call = delta.delta.function_call
+
+            # extract tool calls from response
+            if delta_assistant_message_function_call_storage is not None:
+                # handle process of stream function call
+                if assistant_message_function_call:
+                    # message has not ended ever
+                    delta_assistant_message_function_call_storage.arguments += assistant_message_function_call.arguments
+                    continue
+                else:
+                    # message has ended
+                    assistant_message_function_call = delta_assistant_message_function_call_storage
+                    delta_assistant_message_function_call_storage = None
+            else:
+                if assistant_message_function_call:
+                    # start of stream function call
+                    delta_assistant_message_function_call_storage = assistant_message_function_call
+                    if delta_assistant_message_function_call_storage.arguments is None:
+                        delta_assistant_message_function_call_storage.arguments = ''
+                    if not has_finish_reason:
+                        continue
+
+            # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+            function_call = self._extract_response_function_call(assistant_message_function_call)
+            tool_calls = [function_call] if function_call else []
+            if tool_calls:
+                final_tool_calls.extend(tool_calls)
+
+            # transform assistant message to prompt message
+            assistant_prompt_message = AssistantPromptMessage(
+                content=delta.delta.content if delta.delta.content else '',
+                tool_calls=tool_calls
+            )
+
+            full_assistant_content += delta.delta.content if delta.delta.content else ''
+
+            if has_finish_reason:
+                final_chunk = LLMResultChunk(
+                    model=chunk.model,
+                    prompt_messages=prompt_messages,
+                    system_fingerprint=chunk.system_fingerprint,
+                    delta=LLMResultChunkDelta(
+                        index=delta.index,
+                        message=assistant_prompt_message,
+                        finish_reason=delta.finish_reason,
+                    )
+                )
+            else:
+                yield LLMResultChunk(
+                    model=chunk.model,
+                    prompt_messages=prompt_messages,
+                    system_fingerprint=chunk.system_fingerprint,
+                    delta=LLMResultChunkDelta(
+                        index=delta.index,
+                        message=assistant_prompt_message,
+                    )
+                )
+
+        if not prompt_tokens:
+            prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+
+        if not completion_tokens:
+            full_assistant_prompt_message = AssistantPromptMessage(
+                content=full_assistant_content,
+                tool_calls=final_tool_calls
+            )
+            completion_tokens = self._num_tokens_from_messages(model, [full_assistant_prompt_message])
+
+        # transform usage
+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+        final_chunk.delta.usage = usage
+
+        yield final_chunk
+
+    def _extract_response_tool_calls(self,
+                                     response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]) \
+            -> list[AssistantPromptMessage.ToolCall]:
+        """
+        Extract tool calls from response
+
+        :param response_tool_calls: response tool calls
+        :return: list of tool calls
+        """
+        tool_calls = []
+        if response_tool_calls:
+            for response_tool_call in response_tool_calls:
+                function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+                    name=response_tool_call.function.name,
+                    arguments=response_tool_call.function.arguments
+                )
+
+                tool_call = AssistantPromptMessage.ToolCall(
+                    id=response_tool_call.id,
+                    type=response_tool_call.type,
+                    function=function
+                )
+                tool_calls.append(tool_call)
+
+        return tool_calls
+
+    def _extract_response_function_call(self, response_function_call: FunctionCall | ChoiceDeltaFunctionCall) \
+            -> AssistantPromptMessage.ToolCall:
+        """
+        Extract function call from response
+
+        :param response_function_call: response function call
+        :return: tool call
+        """
+        tool_call = None
+        if response_function_call:
+            function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+                name=response_function_call.name,
+                arguments=response_function_call.arguments
+            )
+
+            tool_call = AssistantPromptMessage.ToolCall(
+                id=response_function_call.name,
+                type="function",
+                function=function
+            )
+
+        return tool_call
+
+    def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+        """
+        Convert PromptMessage to dict for Upstage API
+        """
+        if isinstance(message, UserPromptMessage):
+            message = cast(UserPromptMessage, message)
+            if isinstance(message.content, str):
+                message_dict = {"role": "user", "content": message.content}
+            else:
+                sub_messages = []
+                for message_content in message.content:
+                    if message_content.type == PromptMessageContentType.TEXT:
+                        message_content = cast(TextPromptMessageContent, message_content)
+                        sub_message_dict = {
+                            "type": "text",
+                            "text": message_content.data
+                        }
+                        sub_messages.append(sub_message_dict)
+                    elif message_content.type == PromptMessageContentType.IMAGE:
+                        message_content = cast(ImagePromptMessageContent, message_content)
+                        sub_message_dict = {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": message_content.data,
+                                "detail": message_content.detail.value
+                            }
+                        }
+                        sub_messages.append(sub_message_dict)
+
+                message_dict = {"role": "user", "content": sub_messages}
+        elif isinstance(message, AssistantPromptMessage):
+            message = cast(AssistantPromptMessage, message)
+            message_dict = {"role": "assistant", "content": message.content}
+            if message.tool_calls:
+                # message_dict["tool_calls"] = [tool_call.dict() for tool_call in
+                #                               message.tool_calls]
+                function_call = message.tool_calls[0]
+                message_dict["function_call"] = {
+                    "name": function_call.function.name,
+                    "arguments": function_call.function.arguments,
+                }
+        elif isinstance(message, SystemPromptMessage):
+            message = cast(SystemPromptMessage, message)
+            message_dict = {"role": "system", "content": message.content}
+        elif isinstance(message, ToolPromptMessage):
+            message = cast(ToolPromptMessage, message)
+            # message_dict = {
+            #     "role": "tool",
+            #     "content": message.content,
+            #     "tool_call_id": message.tool_call_id
+            # }
+            message_dict = {
+                "role": "function",
+                "content": message.content,
+                "name": message.tool_call_id
+            }
+        else:
+            raise ValueError(f"Got unknown type {message}")
+
+        if message.name:
+            message_dict["name"] = message.name
+
+        return message_dict
+
+    def _get_tokenizer(self) -> Tokenizer:
+        return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
+
+    def _num_tokens_from_messages(self, model: str, messages: list[PromptMessage],
+                                  tools: Optional[list[PromptMessageTool]] = None) -> int:
+        """
+        Calculate num tokens for solar with Huggingface Solar tokenizer.
+        Solar tokenizer is opened in huggingface https://huggingface.co/upstage/solar-1-mini-tokenizer 
+        """
+        tokenizer = self._get_tokenizer()
+        tokens_per_message = 5 # <|im_start|>{role}\n{message}<|im_end|>
+        tokens_prefix = 1 # <|startoftext|>
+        tokens_suffix = 3 # <|im_start|>assistant\n
+
+        num_tokens = 0
+        num_tokens += tokens_prefix
+
+        messages_dict = [self._convert_prompt_message_to_dict(message) for message in messages]
+        for message in messages_dict:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                if isinstance(value, list):
+                    text = ''
+                    for item in value:
+                        if isinstance(item, dict) and item['type'] == 'text':
+                            text += item['text']
+                    value = text
+
+                if key == "tool_calls":
+                    for tool_call in value:
+                        for t_key, t_value in tool_call.items():
+                            num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False))
+                            if t_key == "function":
+                                for f_key, f_value in t_value.items():
+                                    num_tokens += len(tokenizer.encode(f_key, add_special_tokens=False))
+                                    num_tokens += len(tokenizer.encode(f_value, add_special_tokens=False))
+                            else:
+                                num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False))
+                                num_tokens += len(tokenizer.encode(t_value, add_special_tokens=False))
+                else:
+                    num_tokens += len(tokenizer.encode(str(value), add_special_tokens=False))
+        num_tokens += tokens_suffix
+
+        if tools:
+            num_tokens += self._num_tokens_for_tools(tokenizer, tools)
+
+        return num_tokens
+
+    def _num_tokens_for_tools(self, tokenizer: Tokenizer, tools: list[PromptMessageTool]) -> int:
+        """
+        Calculate num tokens for tool calling with upstage tokenizer.
+
+        :param tokenizer: huggingface tokenizer
+        :param tools: tools for tool calling
+        :return: number of tokens
+        """
+        num_tokens = 0
+        for tool in tools:
+            num_tokens += len(tokenizer.encode('type'))
+            num_tokens += len(tokenizer.encode('function'))
+
+            # calculate num tokens for function object
+            num_tokens += len(tokenizer.encode('name'))
+            num_tokens += len(tokenizer.encode(tool.name))
+            num_tokens += len(tokenizer.encode('description'))
+            num_tokens += len(tokenizer.encode(tool.description))
+            parameters = tool.parameters
+            num_tokens += len(tokenizer.encode('parameters'))
+            if 'title' in parameters:
+                num_tokens += len(tokenizer.encode('title'))
+                num_tokens += len(tokenizer.encode(parameters.get("title")))
+            num_tokens += len(tokenizer.encode('type'))
+            num_tokens += len(tokenizer.encode(parameters.get("type")))
+            if 'properties' in parameters:
+                num_tokens += len(tokenizer.encode('properties'))
+                for key, value in parameters.get('properties').items():
+                    num_tokens += len(tokenizer.encode(key))
+                    for field_key, field_value in value.items():
+                        num_tokens += len(tokenizer.encode(field_key))
+                        if field_key == 'enum':
+                            for enum_field in field_value:
+                                num_tokens += 3
+                                num_tokens += len(tokenizer.encode(enum_field))
+                        else:
+                            num_tokens += len(tokenizer.encode(field_key))
+                            num_tokens += len(tokenizer.encode(str(field_value)))
+            if 'required' in parameters:
+                num_tokens += len(tokenizer.encode('required'))
+                for required_field in parameters['required']:
+                    num_tokens += 3
+                    num_tokens += len(tokenizer.encode(required_field))
+
+        return num_tokens

+ 43 - 0
api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml

@@ -0,0 +1,43 @@
+model: solar-1-mini-chat
+label:
+  zh_Hans: solar-1-mini-chat
+  en_US: solar-1-mini-chat
+  ko_KR: solar-1-mini-chat
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 32768
+  - name: seed
+    label:
+      zh_Hans: 种子
+      en_US: Seed
+    type: int
+    help:
+      zh_Hans:
+        如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
+        响应参数来监视变化。
+      en_US:
+        If specified, model will make a best effort to sample deterministically,
+        such that repeated requests with the same seed and parameters should return
+        the same result. Determinism is not guaranteed, and you should refer to the
+        system_fingerprint response parameter to monitor changes in the backend.
+    required: false
+pricing:
+  input: "0.5"
+  output: "0.5"
+  unit: "0.000001"
+  currency: USD

+ 0 - 0
api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py


+ 9 - 0
api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml

@@ -0,0 +1,9 @@
+model: solar-embedding-1-large-passage
+model_type: text-embedding
+model_properties:
+  context_size: 4000
+  max_chunks: 32
+pricing:
+  input: '0.1'
+  unit: '0.000001'
+  currency: 'USD'

+ 9 - 0
api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml

@@ -0,0 +1,9 @@
+model: solar-embedding-1-large-query
+model_type: text-embedding
+model_properties:
+  context_size: 4000
+  max_chunks: 32
+pricing:
+  input: '0.1'
+  unit: '0.000001'
+  currency: 'USD'

+ 195 - 0
api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py

@@ -0,0 +1,195 @@
+import base64
+import time
+from collections.abc import Mapping
+from typing import Union
+
+import numpy as np
+from openai import OpenAI
+from tokenizers import Tokenizer
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.upstage._common import _CommonUpstage
+
+
+class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
+    """
+    Model class for Upstage text embedding model.
+    """
+    def _get_tokenizer(self) -> Tokenizer:
+        return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
+
+    def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :return: embeddings result
+        """
+
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        client = OpenAI(**credentials_kwargs)
+
+        extra_model_kwargs = {}
+        if user:
+            extra_model_kwargs["user"] = user
+        extra_model_kwargs["encoding_format"] = "base64"
+
+        context_size = self._get_context_size(model, credentials)
+        max_chunks = self._get_max_chunks(model, credentials)
+
+        embeddings: list[list[float]] = [[] for _ in range(len(texts))]
+        tokens = []
+        indices = []
+        used_tokens = 0
+
+        tokenizer = self._get_tokenizer()
+
+        for i, text in enumerate(texts):
+            token = tokenizer.encode(text, add_special_tokens=False).tokens
+            for j in range(0, len(token), context_size):
+                tokens += [token[j:j+context_size]]
+                indices += [i]
+        
+        batched_embeddings = []
+        _iter = range(0, len(tokens), max_chunks)
+
+        for i in _iter:
+            embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+                model=model,
+                client=client,
+                texts=tokens[i:i+max_chunks],
+                extra_model_kwargs=extra_model_kwargs,
+            )
+
+            used_tokens += embedding_used_tokens
+            batched_embeddings += embeddings_batch
+        
+        results: list[list[list[float]]] = [[] for _ in range(len(texts))]
+        num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
+
+        for i in range(len(indices)):
+            results[indices[i]].append(batched_embeddings[i])
+            num_tokens_in_batch[indices[i]].append(len(tokens[i]))
+        
+        for i in range(len(texts)):
+            _result = results[i]
+            if len(_result) == 0:
+                embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+                    model=model,
+                    client=client,
+                    texts=[texts[i]],
+                    extra_model_kwargs=extra_model_kwargs,
+                )
+                used_tokens += embedding_used_tokens
+                average = embeddings_batch[0]
+            else:
+                average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
+            embeddings[i] = (average / np.linalg.norm(average)).tolist()
+        
+        usage = self._calc_response_usage(
+            model=model,
+            credentials=credentials,
+            tokens=used_tokens
+        )
+
+        return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
+    
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        tokenizer = self._get_tokenizer()
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        if len(texts) == 0:
+            return 0
+
+        tokenizer = self._get_tokenizer()
+
+        total_num_tokens = 0
+        for text in texts:
+            # calculate the number of tokens in the encoded text
+            tokenized_text = tokenizer.encode(text)
+            total_num_tokens += len(tokenized_text)
+
+        return total_num_tokens
+    
+    def validate_credentials(self, model: str, credentials: Mapping) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            # transform credentials to kwargs for model instance
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            client = OpenAI(**credentials_kwargs)
+
+            # call embedding model
+            self._embedding_invoke(
+                model=model,
+                client=client,
+                texts=['ping'],
+                extra_model_kwargs={}
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+    
+    def _embedding_invoke(self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict) -> tuple[list[list[float]], int]:
+        """
+        Invoke embedding model
+        :param model: model name
+        :param client: model client
+        :param texts: texts to embed
+        :param extra_model_kwargs: extra model kwargs
+        :return: embeddings and used tokens
+        """
+        response = client.embeddings.create(
+            model=model,
+            input=texts,
+            **extra_model_kwargs
+        )
+
+        if 'encoding_format' in extra_model_kwargs and extra_model_kwargs['encoding_format'] == 'base64':
+            return ([list(np.frombuffer(base64.b64decode(embedding.embedding), dtype=np.float32)) for embedding in response.data], response.usage.total_tokens)
+        
+        return [data.embedding for data in response.data], response.usage.total_tokens
+    
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: input tokens
+        :return: usage
+        """
+        input_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            tokens=tokens,
+            price_type=PriceType.INPUT
+        )
+
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at
+        )
+
+        return usage

+ 32 - 0
api/core/model_runtime/model_providers/upstage/upstage.py

@@ -0,0 +1,32 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class UpstageProvider(ModelProvider):
+    
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials from defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.LLM)
+
+            model_instance.validate_credentials(
+                model="solar-1-mini-chat",
+                credentials=credentials
+            )
+        except CredentialsValidateFailedError as e:
+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+            raise e
+        except Exception as e:
+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+            raise e
+                

+ 49 - 0
api/core/model_runtime/model_providers/upstage/upstage.yaml

@@ -0,0 +1,49 @@
+provider: upstage
+label:
+  en_US: Upstage
+description:
+  en_US: Models provided by Upstage, such as Solar-1-mini-chat.
+  zh_Hans: Upstage 提供的模型,例如 Solar-1-mini-chat.
+icon_small:
+  en_US: icon_s_en.svg
+icon_large:
+  en_US: icon_l_en.svg
+background: "#FFFFF"
+help:
+  title:
+    en_US: Get your API Key from Upstage
+    zh_Hans: 从 Upstage 获取 API Key
+  url:
+    en_US: https://console.upstage.ai/api-keys
+supported_model_types:
+  - llm
+  - text-embedding
+configurate_methods:
+  - predefined-model
+model_credential_schema:
+  model:
+    label:
+      en_US: Model Name
+      zh_Hans: 模型名称
+    placeholder:
+      en_US: Enter your model name
+      zh_Hans: 输入模型名称
+  credential_form_schemas:
+    - variable: upstage_api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: upstage_api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key

+ 1 - 1
api/docker/entrypoint.sh

@@ -4,7 +4,7 @@ set -e
 
 if [[ "${MIGRATION_ENABLED}" == "true" ]]; then
   echo "Running migrations"
-  flask upgrade-db
+  flask db upgrade
 fi
 
 if [[ "${MODE}" == "worker" ]]; then

+ 1 - 0
api/pyproject.toml

@@ -73,6 +73,7 @@ quote-style = "single"
 
 [tool.pytest_env]
 OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii"
+UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa"
 AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com"
 AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94"
 ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz"

+ 0 - 0
api/tests/integration_tests/model_runtime/upstage/__init__.py


+ 245 - 0
api/tests/integration_tests/model_runtime/upstage/test_llm.py

@@ -0,0 +1,245 @@
+import os
+from collections.abc import Generator
+
+import pytest
+
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessageTool,
+    SystemPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.upstage.llm.llm import UpstageLargeLanguageModel
+
+"""FOR MOCK FIXTURES, DO NOT REMOVE"""
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+def test_predefined_models():
+    model = UpstageLargeLanguageModel()
+    model_schemas = model.predefined_models()
+
+    assert len(model_schemas) >= 1
+    assert isinstance(model_schemas[0], AIModelEntity)
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock):
+    model = UpstageLargeLanguageModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        # model name to gpt-3.5-turbo because of mocking
+        model.validate_credentials(
+            model='gpt-3.5-turbo',
+            credentials={
+                'upstage_api_key': 'invalid_key'
+            }
+        )
+
+    model.validate_credentials(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        }
+    )
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model(setup_openai_mock):
+    model = UpstageLargeLanguageModel()
+
+    result = model.invoke(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.0,
+            'top_p': 1.0,
+            'presence_penalty': 0.0,
+            'frequency_penalty': 0.0,
+            'max_tokens': 10
+        },
+        stop=['How'],
+        stream=False,
+        user="abc-123"
+    )
+
+    assert isinstance(result, LLMResult)
+    assert len(result.message.content) > 0
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model_with_tools(setup_openai_mock):
+    model = UpstageLargeLanguageModel()
+
+    result = model.invoke(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content="what's the weather today in London?",
+            )
+        ],
+        model_parameters={
+            'temperature': 0.0,
+            'max_tokens': 100
+        },
+        tools=[
+            PromptMessageTool(
+                name='get_weather',
+                description='Determine weather in my location',
+                parameters={
+                    "type": "object",
+                    "properties": {
+                      "location": {
+                        "type": "string",
+                        "description": "The city and state e.g. San Francisco, CA"
+                      },
+                      "unit": {
+                        "type": "string",
+                        "enum": [
+                          "c",
+                          "f"
+                        ]
+                      }
+                    },
+                    "required": [
+                      "location"
+                    ]
+                  }
+            ),
+            PromptMessageTool(
+                name='get_stock_price',
+                description='Get the current stock price',
+                parameters={
+                    "type": "object",
+                    "properties": {
+                      "symbol": {
+                        "type": "string",
+                        "description": "The stock symbol"
+                      }
+                    },
+                    "required": [
+                      "symbol"
+                    ]
+                  }
+            )
+        ],
+        stream=False,
+        user="abc-123"
+    )
+
+    assert isinstance(result, LLMResult)
+    assert isinstance(result.message, AssistantPromptMessage)
+    assert len(result.message.tool_calls) > 0
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock):
+    model = UpstageLargeLanguageModel()
+
+    result = model.invoke(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.0,
+            'max_tokens': 100
+        },
+        stream=True,
+        user="abc-123"
+    )
+
+    assert isinstance(result, Generator)
+
+    for chunk in result:
+        assert isinstance(chunk, LLMResultChunk)
+        assert isinstance(chunk.delta, LLMResultChunkDelta)
+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+        if chunk.delta.finish_reason is not None:
+            assert chunk.delta.usage is not None
+            assert chunk.delta.usage.completion_tokens > 0
+
+
+def test_get_num_tokens():
+    model = UpstageLargeLanguageModel()
+
+    num_tokens = model.get_num_tokens(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ]
+    )
+
+    assert num_tokens == 13
+
+    num_tokens = model.get_num_tokens(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        tools=[
+            PromptMessageTool(
+                name='get_weather',
+                description='Determine weather in my location',
+                parameters={
+                    "type": "object",
+                    "properties": {
+                      "location": {
+                        "type": "string",
+                        "description": "The city and state e.g. San Francisco, CA"
+                      },
+                      "unit": {
+                        "type": "string",
+                        "enum": [
+                          "c",
+                          "f"
+                        ]
+                      }
+                    },
+                    "required": [
+                      "location"
+                    ]
+                }
+            ),
+        ]
+    )
+
+    assert num_tokens == 106

+ 23 - 0
api/tests/integration_tests/model_runtime/upstage/test_provider.py

@@ -0,0 +1,23 @@
+import os
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.upstage.upstage import UpstageProvider
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_provider_credentials(setup_openai_mock):
+    provider = UpstageProvider()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        provider.validate_provider_credentials(
+            credentials={}
+        )
+
+    provider.validate_provider_credentials(
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        }
+    )

+ 67 - 0
api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py

@@ -0,0 +1,67 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.upstage.text_embedding.text_embedding import UpstageTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_validate_credentials(setup_openai_mock):
+    model = UpstageTextEmbeddingModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model='solar-embedding-1-large-passage',
+            credentials={
+                'upstage_api_key': 'invalid_key'
+            }
+        )
+
+    model.validate_credentials(
+        model='solar-embedding-1-large-passage',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        }
+    )
+
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_invoke_model(setup_openai_mock):
+    model = UpstageTextEmbeddingModel()
+
+    result = model.invoke(
+        model='solar-embedding-1-large-passage',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'),
+        },
+        texts=[
+            "hello",
+            "world",
+            " ".join(["long_text"] * 100),
+            " ".join(["another_long_text"] * 100)
+        ],
+        user="abc-123"
+    )
+
+    assert isinstance(result, TextEmbeddingResult)
+    assert len(result.embeddings) == 4
+    assert result.usage.total_tokens == 2
+
+
+def test_get_num_tokens():
+    model = UpstageTextEmbeddingModel()
+
+    num_tokens = model.get_num_tokens(
+        model='solar-embedding-1-large-passage',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'),
+        },
+        texts=[
+            "hello",
+            "world"
+        ]
+    )
+
+    assert num_tokens == 5

+ 3 - 1
dev/pytest/pytest_model_runtime.sh

@@ -5,4 +5,6 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
   api/tests/integration_tests/model_runtime/azure_openai \
   api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \
   api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
-  api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py
+  api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
+  api/tests/integration_tests/model_runtime/upstage
+