test_llm.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. import os
  2. from typing import Generator
  3. import pytest
  4. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
  5. from core.model_runtime.entities.message_entities import (AssistantPromptMessage, PromptMessageTool,
  6. SystemPromptMessage, TextPromptMessageContent,
  7. UserPromptMessage)
  8. from core.model_runtime.entities.model_entities import AIModelEntity
  9. from core.model_runtime.errors.validate import CredentialsValidateFailedError
  10. from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel
  11. """FOR MOCK FIXTURES, DO NOT REMOVE"""
  12. from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
  13. from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock
  14. @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
  15. def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):
  16. model = XinferenceAILargeLanguageModel()
  17. with pytest.raises(CredentialsValidateFailedError):
  18. model.validate_credentials(
  19. model='ChatGLM3',
  20. credentials={
  21. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  22. 'model_uid': 'www ' + os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  23. }
  24. )
  25. with pytest.raises(CredentialsValidateFailedError):
  26. model.validate_credentials(
  27. model='aaaaa',
  28. credentials={
  29. 'server_url': '',
  30. 'model_uid': ''
  31. }
  32. )
  33. model.validate_credentials(
  34. model='ChatGLM3',
  35. credentials={
  36. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  37. 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  38. }
  39. )
  40. @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
  41. def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):
  42. model = XinferenceAILargeLanguageModel()
  43. response = model.invoke(
  44. model='ChatGLM3',
  45. credentials={
  46. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  47. 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  48. },
  49. prompt_messages=[
  50. SystemPromptMessage(
  51. content='You are a helpful AI assistant.',
  52. ),
  53. UserPromptMessage(
  54. content='Hello World!'
  55. )
  56. ],
  57. model_parameters={
  58. 'temperature': 0.7,
  59. 'top_p': 1.0,
  60. },
  61. stop=['you'],
  62. user="abc-123",
  63. stream=False
  64. )
  65. assert isinstance(response, LLMResult)
  66. assert len(response.message.content) > 0
  67. assert response.usage.total_tokens > 0
  68. @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
  69. def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):
  70. model = XinferenceAILargeLanguageModel()
  71. response = model.invoke(
  72. model='ChatGLM3',
  73. credentials={
  74. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  75. 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  76. },
  77. prompt_messages=[
  78. SystemPromptMessage(
  79. content='You are a helpful AI assistant.',
  80. ),
  81. UserPromptMessage(
  82. content='Hello World!'
  83. )
  84. ],
  85. model_parameters={
  86. 'temperature': 0.7,
  87. 'top_p': 1.0,
  88. },
  89. stop=['you'],
  90. stream=True,
  91. user="abc-123"
  92. )
  93. assert isinstance(response, Generator)
  94. for chunk in response:
  95. assert isinstance(chunk, LLMResultChunk)
  96. assert isinstance(chunk.delta, LLMResultChunkDelta)
  97. assert isinstance(chunk.delta.message, AssistantPromptMessage)
  98. assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  99. """
  100. Funtion calling of xinference does not support stream mode currently
  101. """
  102. # def test_invoke_stream_chat_model_with_functions():
  103. # model = XinferenceAILargeLanguageModel()
  104. # response = model.invoke(
  105. # model='ChatGLM3-6b',
  106. # credentials={
  107. # 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  108. # 'model_type': 'text-generation',
  109. # 'model_name': 'ChatGLM3',
  110. # 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  111. # },
  112. # prompt_messages=[
  113. # SystemPromptMessage(
  114. # content='你是一个天气机器人,可以通过调用函数来获取天气信息',
  115. # ),
  116. # UserPromptMessage(
  117. # content='波士顿天气如何?'
  118. # )
  119. # ],
  120. # model_parameters={
  121. # 'temperature': 0,
  122. # 'top_p': 1.0,
  123. # },
  124. # stop=['you'],
  125. # user='abc-123',
  126. # stream=True,
  127. # tools=[
  128. # PromptMessageTool(
  129. # name='get_current_weather',
  130. # description='Get the current weather in a given location',
  131. # parameters={
  132. # "type": "object",
  133. # "properties": {
  134. # "location": {
  135. # "type": "string",
  136. # "description": "The city and state e.g. San Francisco, CA"
  137. # },
  138. # "unit": {
  139. # "type": "string",
  140. # "enum": ["celsius", "fahrenheit"]
  141. # }
  142. # },
  143. # "required": [
  144. # "location"
  145. # ]
  146. # }
  147. # )
  148. # ]
  149. # )
  150. # assert isinstance(response, Generator)
  151. # call: LLMResultChunk = None
  152. # chunks = []
  153. # for chunk in response:
  154. # chunks.append(chunk)
  155. # assert isinstance(chunk, LLMResultChunk)
  156. # assert isinstance(chunk.delta, LLMResultChunkDelta)
  157. # assert isinstance(chunk.delta.message, AssistantPromptMessage)
  158. # assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  159. # if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0:
  160. # call = chunk
  161. # break
  162. # assert call is not None
  163. # assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'
  164. # def test_invoke_chat_model_with_functions():
  165. # model = XinferenceAILargeLanguageModel()
  166. # response = model.invoke(
  167. # model='ChatGLM3-6b',
  168. # credentials={
  169. # 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  170. # 'model_type': 'text-generation',
  171. # 'model_name': 'ChatGLM3',
  172. # 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  173. # },
  174. # prompt_messages=[
  175. # UserPromptMessage(
  176. # content='What is the weather like in San Francisco?'
  177. # )
  178. # ],
  179. # model_parameters={
  180. # 'temperature': 0.7,
  181. # 'top_p': 1.0,
  182. # },
  183. # stop=['you'],
  184. # user='abc-123',
  185. # stream=False,
  186. # tools=[
  187. # PromptMessageTool(
  188. # name='get_current_weather',
  189. # description='Get the current weather in a given location',
  190. # parameters={
  191. # "type": "object",
  192. # "properties": {
  193. # "location": {
  194. # "type": "string",
  195. # "description": "The city and state e.g. San Francisco, CA"
  196. # },
  197. # "unit": {
  198. # "type": "string",
  199. # "enum": [
  200. # "c",
  201. # "f"
  202. # ]
  203. # }
  204. # },
  205. # "required": [
  206. # "location"
  207. # ]
  208. # }
  209. # )
  210. # ]
  211. # )
  212. # assert isinstance(response, LLMResult)
  213. # assert len(response.message.content) > 0
  214. # assert response.usage.total_tokens > 0
  215. # assert response.message.tool_calls[0].function.name == 'get_current_weather'
  216. @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
  217. def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):
  218. model = XinferenceAILargeLanguageModel()
  219. with pytest.raises(CredentialsValidateFailedError):
  220. model.validate_credentials(
  221. model='alapaca',
  222. credentials={
  223. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  224. 'model_uid': 'www ' + os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
  225. }
  226. )
  227. with pytest.raises(CredentialsValidateFailedError):
  228. model.validate_credentials(
  229. model='alapaca',
  230. credentials={
  231. 'server_url': '',
  232. 'model_uid': ''
  233. }
  234. )
  235. model.validate_credentials(
  236. model='alapaca',
  237. credentials={
  238. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  239. 'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
  240. }
  241. )
  242. @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
  243. def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):
  244. model = XinferenceAILargeLanguageModel()
  245. response = model.invoke(
  246. model='alapaca',
  247. credentials={
  248. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  249. 'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
  250. },
  251. prompt_messages=[
  252. UserPromptMessage(
  253. content='the United States is'
  254. )
  255. ],
  256. model_parameters={
  257. 'temperature': 0.7,
  258. 'top_p': 1.0,
  259. },
  260. stop=['you'],
  261. user="abc-123",
  262. stream=False
  263. )
  264. assert isinstance(response, LLMResult)
  265. assert len(response.message.content) > 0
  266. assert response.usage.total_tokens > 0
  267. @pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
  268. def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):
  269. model = XinferenceAILargeLanguageModel()
  270. response = model.invoke(
  271. model='alapaca',
  272. credentials={
  273. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  274. 'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
  275. },
  276. prompt_messages=[
  277. UserPromptMessage(
  278. content='the United States is'
  279. )
  280. ],
  281. model_parameters={
  282. 'temperature': 0.7,
  283. 'top_p': 1.0,
  284. },
  285. stop=['you'],
  286. stream=True,
  287. user="abc-123"
  288. )
  289. assert isinstance(response, Generator)
  290. for chunk in response:
  291. assert isinstance(chunk, LLMResultChunk)
  292. assert isinstance(chunk.delta, LLMResultChunkDelta)
  293. assert isinstance(chunk.delta.message, AssistantPromptMessage)
  294. assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  295. def test_get_num_tokens():
  296. model = XinferenceAILargeLanguageModel()
  297. num_tokens = model.get_num_tokens(
  298. model='ChatGLM3',
  299. credentials={
  300. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  301. 'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
  302. },
  303. prompt_messages=[
  304. SystemPromptMessage(
  305. content='You are a helpful AI assistant.',
  306. ),
  307. UserPromptMessage(
  308. content='Hello World!'
  309. )
  310. ],
  311. tools=[
  312. PromptMessageTool(
  313. name='get_current_weather',
  314. description='Get the current weather in a given location',
  315. parameters={
  316. "type": "object",
  317. "properties": {
  318. "location": {
  319. "type": "string",
  320. "description": "The city and state e.g. San Francisco, CA"
  321. },
  322. "unit": {
  323. "type": "string",
  324. "enum": [
  325. "c",
  326. "f"
  327. ]
  328. }
  329. },
  330. "required": [
  331. "location"
  332. ]
  333. }
  334. )
  335. ]
  336. )
  337. assert isinstance(num_tokens, int)
  338. assert num_tokens == 77
  339. num_tokens = model.get_num_tokens(
  340. model='ChatGLM3',
  341. credentials={
  342. 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  343. 'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
  344. },
  345. prompt_messages=[
  346. SystemPromptMessage(
  347. content='You are a helpful AI assistant.',
  348. ),
  349. UserPromptMessage(
  350. content='Hello World!'
  351. )
  352. ],
  353. )
  354. assert isinstance(num_tokens, int)
  355. assert num_tokens == 21