@@ -246,11 +246,10 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
246
246
if hasattr (request , 'json' ):
247
247
data = request .get_json ()
248
248
messages = data .get ('messages' , [])
249
- # Copy all parameters except 'model' and 'messages'
249
+ # Copy all parameters except 'stream', ' model' , 'n ' and 'messages'
250
250
kwargs = {k : v for k , v in data .items ()
251
- if k not in ['model' , 'messages' , 'optillm_approach' ]}
251
+ if k not in ['model' , 'messages' , 'stream' , 'n' , ' optillm_approach' ]}
252
252
response = none_approach (original_messages = messages , client = client , model = model , ** kwargs )
253
-
254
253
# For none approach, we return the response and a token count of 0
255
254
# since the full token count is already in the response
256
255
return response , 0
@@ -369,6 +368,21 @@ def generate_streaming_response(final_response, model):
369
368
# Yield the final message to indicate the stream has ended
370
369
yield "data: [DONE]\n \n "
371
370
371
+ def extract_contents (response_obj ):
372
+ contents = []
373
+ # Handle both single response and list of responses
374
+ responses = response_obj if isinstance (response_obj , list ) else [response_obj ]
375
+
376
+ for response in responses :
377
+ # Extract content from first choice if it exists
378
+ if (response .get ('choices' ) and
379
+ len (response ['choices' ]) > 0 and
380
+ response ['choices' ][0 ].get ('message' ) and
381
+ response ['choices' ][0 ]['message' ].get ('content' )):
382
+ contents .append (response ['choices' ][0 ]['message' ]['content' ])
383
+
384
+ return contents
385
+
372
386
def parse_conversation (messages ):
373
387
system_prompt = ""
374
388
conversation = []
@@ -523,8 +537,13 @@ def proxy():
523
537
result = responses
524
538
else :
525
539
result , completion_tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model )
540
+
526
541
logger .debug (f'Direct proxy response: { result } ' )
527
- return jsonify (result ), 200
542
+
543
+ if stream :
544
+ return Response (generate_streaming_response (extract_contents (result ), model ), content_type = 'text/event-stream' )
545
+ else :
546
+ return jsonify (result ), 200
528
547
529
548
elif operation == 'AND' or operation == 'OR' :
530
549
if contains_none :
@@ -545,7 +564,7 @@ def proxy():
545
564
messages = tagged_conversation_to_messages (response )
546
565
if messages : # Only take the last message if we have any
547
566
response = messages [- 1 ]['content' ]
548
-
567
+
549
568
if stream :
550
569
return Response (generate_streaming_response (response , model ), content_type = 'text/event-stream' )
551
570
else :
0 commit comments