| 
					
				 | 
			
			
				@@ -615,19 +615,11 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         prompt_messages = self._clear_illegal_prompt_messages(model, prompt_messages) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # o1 compatibility 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        block_as_stream = False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if model.startswith("o1"): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if "max_tokens" in model_parameters: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 model_parameters["max_completion_tokens"] = model_parameters["max_tokens"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 del model_parameters["max_tokens"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if stream: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                block_as_stream = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                stream = False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if "stream_options" in extra_model_kwargs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    del extra_model_kwargs["stream_options"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if "stop" in extra_model_kwargs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 del extra_model_kwargs["stop"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -644,47 +636,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if stream: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        block_result = self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if block_as_stream: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return self._handle_chat_block_as_stream_response(block_result, prompt_messages, stop) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return block_result 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def _handle_chat_block_as_stream_response( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        self, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        block_result: LLMResult, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        prompt_messages: list[PromptMessage], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        stop: Optional[list[str]] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    ) -> Generator[LLMResultChunk, None, None]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        Handle llm chat response 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        :param model: model name 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        :param credentials: credentials 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        :param response: response 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        :param prompt_messages: prompt messages 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        :param tools: tools for tool calling 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        :param stop: stop words 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        :return: llm response chunk generator 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        text = block_result.message.content 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        text = cast(str, text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if stop: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            text = self.enforce_stop_tokens(text, stop) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        yield LLMResultChunk( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            model=block_result.model, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            prompt_messages=prompt_messages, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            system_fingerprint=block_result.system_fingerprint, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            delta=LLMResultChunkDelta( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                index=0, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                message=AssistantPromptMessage(content=text), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                finish_reason="stop", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                usage=block_result.usage, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            ), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def _handle_chat_generate_response( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self, 
			 |