| 
					
				 | 
			
			
				@@ -197,8 +197,7 @@ class TongyiLargeLanguageModel(LargeLanguageModel): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # nothing different between chat model and completion model in tongyi 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            response = Generation.call(**params, result_format="message", stream=stream) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            response = Generation.call(**params, result_format="message", stream=stream, incremental_output=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if stream: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return self._handle_generate_stream_response(model, credentials, response, prompt_messages) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -258,6 +257,9 @@ class TongyiLargeLanguageModel(LargeLanguageModel): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         full_text = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         tool_calls = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        is_reasoning_started = False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # for index, response in enumerate(responses): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        index = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for index, response in enumerate(responses): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if response.status_code not in {200, HTTPStatus.OK}: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 raise ServiceUnavailableError( 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -311,7 +313,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     ), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                resp_content = response.output.choices[0].message.content 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                message = response.output.choices[0].message 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                resp_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message, is_reasoning_started 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if not resp_content: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     if "tool_calls" in response.output.choices[0].message: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         tool_calls = response.output.choices[0].message["tool_calls"] 
			 |