parser.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. import re
  2. import uuid
  3. from json import dumps as json_dumps
  4. from json import loads as json_loads
  5. from json.decoder import JSONDecodeError
  6. from requests import get
  7. from yaml import YAMLError, safe_load
  8. from core.tools.entities.common_entities import I18nObject
  9. from core.tools.entities.tool_bundle import ApiToolBundle
  10. from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
  11. from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
  12. class ApiBasedToolSchemaParser:
  13. @staticmethod
  14. def parse_openapi_to_tool_bundle(
  15. openapi: dict, extra_info: dict | None = None, warning: dict | None = None
  16. ) -> list[ApiToolBundle]:
  17. warning = warning if warning is not None else {}
  18. extra_info = extra_info if extra_info is not None else {}
  19. # set description to extra_info
  20. extra_info["description"] = openapi["info"].get("description", "")
  21. if len(openapi["servers"]) == 0:
  22. raise ToolProviderNotFoundError("No server found in the openapi yaml.")
  23. server_url = openapi["servers"][0]["url"]
  24. # list all interfaces
  25. interfaces = []
  26. for path, path_item in openapi["paths"].items():
  27. methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
  28. for method in methods:
  29. if method in path_item:
  30. interfaces.append(
  31. {
  32. "path": path,
  33. "method": method,
  34. "operation": path_item[method],
  35. }
  36. )
  37. # get all parameters
  38. bundles = []
  39. for interface in interfaces:
  40. # convert parameters
  41. parameters = []
  42. if "parameters" in interface["operation"]:
  43. for parameter in interface["operation"]["parameters"]:
  44. tool_parameter = ToolParameter(
  45. name=parameter["name"],
  46. label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
  47. human_description=I18nObject(
  48. en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
  49. ),
  50. type=ToolParameter.ToolParameterType.STRING,
  51. required=parameter.get("required", False),
  52. form=ToolParameter.ToolParameterForm.LLM,
  53. llm_description=parameter.get("description"),
  54. default=parameter["schema"]["default"]
  55. if "schema" in parameter and "default" in parameter["schema"]
  56. else None,
  57. )
  58. # check if there is a type
  59. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
  60. if typ:
  61. tool_parameter.type = typ
  62. parameters.append(tool_parameter)
  63. # create tool bundle
  64. # check if there is a request body
  65. if "requestBody" in interface["operation"]:
  66. request_body = interface["operation"]["requestBody"]
  67. if "content" in request_body:
  68. for content_type, content in request_body["content"].items():
  69. # if there is a reference, get the reference and overwrite the content
  70. if "schema" not in content:
  71. continue
  72. if "$ref" in content["schema"]:
  73. # get the reference
  74. root = openapi
  75. reference = content["schema"]["$ref"].split("/")[1:]
  76. for ref in reference:
  77. root = root[ref]
  78. # overwrite the content
  79. interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
  80. # parse body parameters
  81. if "schema" in interface["operation"]["requestBody"]["content"][content_type]:
  82. body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]
  83. required = body_schema.get("required", [])
  84. properties = body_schema.get("properties", {})
  85. for name, property in properties.items():
  86. tool = ToolParameter(
  87. name=name,
  88. label=I18nObject(en_US=name, zh_Hans=name),
  89. human_description=I18nObject(
  90. en_US=property.get("description", ""), zh_Hans=property.get("description", "")
  91. ),
  92. type=ToolParameter.ToolParameterType.STRING,
  93. required=name in required,
  94. form=ToolParameter.ToolParameterForm.LLM,
  95. llm_description=property.get("description", ""),
  96. default=property.get("default", None),
  97. )
  98. # check if there is a type
  99. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
  100. if typ:
  101. tool.type = typ
  102. parameters.append(tool)
  103. # check if parameters is duplicated
  104. parameters_count = {}
  105. for parameter in parameters:
  106. if parameter.name not in parameters_count:
  107. parameters_count[parameter.name] = 0
  108. parameters_count[parameter.name] += 1
  109. for name, count in parameters_count.items():
  110. if count > 1:
  111. warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
  112. # check if there is a operation id, use $path_$method as operation id if not
  113. if "operationId" not in interface["operation"]:
  114. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  115. path = interface["path"]
  116. if interface["path"].startswith("/"):
  117. path = interface["path"][1:]
  118. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  119. path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
  120. if not path:
  121. path = str(uuid.uuid4())
  122. interface["operation"]["operationId"] = f'{path}_{interface["method"]}'
  123. bundles.append(
  124. ApiToolBundle(
  125. server_url=server_url + interface["path"],
  126. method=interface["method"],
  127. summary=interface["operation"]["description"]
  128. if "description" in interface["operation"]
  129. else interface["operation"].get("summary", None),
  130. operation_id=interface["operation"]["operationId"],
  131. parameters=parameters,
  132. author="",
  133. icon=None,
  134. openapi=interface["operation"],
  135. )
  136. )
  137. return bundles
  138. @staticmethod
  139. def _get_tool_parameter_type(parameter: dict) -> ToolParameter.ToolParameterType:
  140. parameter = parameter or {}
  141. typ = None
  142. if parameter.get("format") == "binary":
  143. return ToolParameter.ToolParameterType.FILE
  144. if "type" in parameter:
  145. typ = parameter["type"]
  146. elif "schema" in parameter and "type" in parameter["schema"]:
  147. typ = parameter["schema"]["type"]
  148. if typ in {"integer", "number"}:
  149. return ToolParameter.ToolParameterType.NUMBER
  150. elif typ == "boolean":
  151. return ToolParameter.ToolParameterType.BOOLEAN
  152. elif typ == "string":
  153. return ToolParameter.ToolParameterType.STRING
  154. @staticmethod
  155. def parse_openapi_yaml_to_tool_bundle(
  156. yaml: str, extra_info: dict | None = None, warning: dict | None = None
  157. ) -> list[ApiToolBundle]:
  158. """
  159. parse openapi yaml to tool bundle
  160. :param yaml: the yaml string
  161. :return: the tool bundle
  162. """
  163. warning = warning if warning is not None else {}
  164. extra_info = extra_info if extra_info is not None else {}
  165. openapi: dict = safe_load(yaml)
  166. if openapi is None:
  167. raise ToolApiSchemaError("Invalid openapi yaml.")
  168. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
  169. @staticmethod
  170. def parse_swagger_to_openapi(swagger: dict, extra_info: dict | None = None, warning: dict | None = None) -> dict:
  171. warning = warning or {}
  172. """
  173. parse swagger to openapi
  174. :param swagger: the swagger dict
  175. :return: the openapi dict
  176. """
  177. # convert swagger to openapi
  178. info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
  179. servers = swagger.get("servers", [])
  180. if len(servers) == 0:
  181. raise ToolApiSchemaError("No server found in the swagger yaml.")
  182. openapi = {
  183. "openapi": "3.0.0",
  184. "info": {
  185. "title": info.get("title", "Swagger"),
  186. "description": info.get("description", "Swagger"),
  187. "version": info.get("version", "1.0.0"),
  188. },
  189. "servers": swagger["servers"],
  190. "paths": {},
  191. "components": {"schemas": {}},
  192. }
  193. # check paths
  194. if "paths" not in swagger or len(swagger["paths"]) == 0:
  195. raise ToolApiSchemaError("No paths found in the swagger yaml.")
  196. # convert paths
  197. for path, path_item in swagger["paths"].items():
  198. openapi["paths"][path] = {}
  199. for method, operation in path_item.items():
  200. if "operationId" not in operation:
  201. raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
  202. if ("summary" not in operation or len(operation["summary"]) == 0) and (
  203. "description" not in operation or len(operation["description"]) == 0
  204. ):
  205. warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
  206. openapi["paths"][path][method] = {
  207. "operationId": operation["operationId"],
  208. "summary": operation.get("summary", ""),
  209. "description": operation.get("description", ""),
  210. "parameters": operation.get("parameters", []),
  211. "responses": operation.get("responses", {}),
  212. }
  213. if "requestBody" in operation:
  214. openapi["paths"][path][method]["requestBody"] = operation["requestBody"]
  215. # convert definitions
  216. for name, definition in swagger["definitions"].items():
  217. openapi["components"]["schemas"][name] = definition
  218. return openapi
  219. @staticmethod
  220. def parse_openai_plugin_json_to_tool_bundle(
  221. json: str, extra_info: dict | None = None, warning: dict | None = None
  222. ) -> list[ApiToolBundle]:
  223. """
  224. parse openapi plugin yaml to tool bundle
  225. :param json: the json string
  226. :return: the tool bundle
  227. """
  228. warning = warning if warning is not None else {}
  229. extra_info = extra_info if extra_info is not None else {}
  230. try:
  231. openai_plugin = json_loads(json)
  232. api = openai_plugin["api"]
  233. api_url = api["url"]
  234. api_type = api["type"]
  235. except:
  236. raise ToolProviderNotFoundError("Invalid openai plugin json.")
  237. if api_type != "openapi":
  238. raise ToolNotSupportedError("Only openapi is supported now.")
  239. # get openapi yaml
  240. response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
  241. if response.status_code != 200:
  242. raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
  243. return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
  244. response.text, extra_info=extra_info, warning=warning
  245. )
  246. @staticmethod
  247. def auto_parse_to_tool_bundle(
  248. content: str, extra_info: dict | None = None, warning: dict | None = None
  249. ) -> tuple[list[ApiToolBundle], str]:
  250. """
  251. auto parse to tool bundle
  252. :param content: the content
  253. :return: tools bundle, schema_type
  254. """
  255. warning = warning if warning is not None else {}
  256. extra_info = extra_info if extra_info is not None else {}
  257. content = content.strip()
  258. loaded_content = None
  259. json_error = None
  260. yaml_error = None
  261. try:
  262. loaded_content = json_loads(content)
  263. except JSONDecodeError as e:
  264. json_error = e
  265. if loaded_content is None:
  266. try:
  267. loaded_content = safe_load(content)
  268. except YAMLError as e:
  269. yaml_error = e
  270. if loaded_content is None:
  271. raise ToolApiSchemaError(
  272. f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
  273. f" yaml error: {str(yaml_error)}"
  274. )
  275. swagger_error = None
  276. openapi_error = None
  277. openapi_plugin_error = None
  278. schema_type = None
  279. try:
  280. openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  281. loaded_content, extra_info=extra_info, warning=warning
  282. )
  283. schema_type = ApiProviderSchemaType.OPENAPI.value
  284. return openapi, schema_type
  285. except ToolApiSchemaError as e:
  286. openapi_error = e
  287. # openai parse error, fallback to swagger
  288. try:
  289. converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
  290. loaded_content, extra_info=extra_info, warning=warning
  291. )
  292. schema_type = ApiProviderSchemaType.SWAGGER.value
  293. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  294. converted_swagger, extra_info=extra_info, warning=warning
  295. ), schema_type
  296. except ToolApiSchemaError as e:
  297. swagger_error = e
  298. # swagger parse error, fallback to openai plugin
  299. try:
  300. openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
  301. json_dumps(loaded_content), extra_info=extra_info, warning=warning
  302. )
  303. return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
  304. except ToolNotSupportedError as e:
  305. # maybe it's not plugin at all
  306. openapi_plugin_error = e
  307. raise ToolApiSchemaError(
  308. f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
  309. f" openapi plugin error: {str(openapi_plugin_error)}"
  310. )