Skip to content

Commit be3e3cd

Browse files
authored
Merge pull request #1185 for fixes for text-followup not working with qwen3-vl
2 parents 0093576 + 81ec1f1 commit be3e3cd

3 files changed

Lines changed: 16 additions & 8 deletions

File tree

preprocessors/text-followup/text-followup.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2021 IMAGE Project, Shared Reality Lab, McGill University
1+
# Copyright (c) 2025 IMAGE Project, Shared Reality Lab, McGill University
22
#
33
# This program is free software: you can redistribute it and/or modify
44
# it under the terms of the GNU Affero General Public License as
@@ -452,13 +452,16 @@ def followup():
452452
{"error": "Failed to process focus area on image"}
453453
), 500
454454

455+
# get followup prompt from env as an override if it exists
456+
followup_prompt = os.getenv('FOLLOWUP_PROMPT_OVERRIDE', FOLLOWUP_PROMPT)
457+
455458
if not focus:
456-
system_prompt = FOLLOWUP_PROMPT
459+
system_prompt = followup_prompt
457460
else:
458-
system_prompt = FOLLOWUP_PROMPT + FOLLOWUP_PROMPT_FOCUS
461+
system_prompt = followup_prompt + FOLLOWUP_PROMPT_FOCUS
459462

460463
system_message = {
461-
"role": "developer",
464+
"role": "system",
462465
"content": system_prompt
463466
}
464467

@@ -508,7 +511,9 @@ def followup():
508511

509512
followup_response_json = llm_client.chat_completion(
510513
prompt="", # Empty since we're using full messages via kwargs
511-
json_schema=FOLLOWUP_RESPONSE_SCHEMA,
514+
system_prompt=system_prompt,
515+
json_schema=None, # qwen3 wants json_object not rigid schema
516+
response_format={"type": "json_object"},
512517
temperature=0.0,
513518
messages=messages, # Pass full conversation history via kwargs
514519
parse_json=True,
@@ -518,7 +523,7 @@ def followup():
518523
if followup_response_json is None:
519524
logging.error("Failed to receive response from LLM.")
520525
return jsonify(
521-
{"error": "Failed to get graphic caption from LLM"}
526+
{"error": "Failed to receive response from LLM"}
522527
), 500
523528

524529
response_text, token_usage = followup_response_json

utils/llm/client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def chat_completion(
121121
# Add system prompt if provided
122122
if system_prompt:
123123
messages.append(
124-
{"role": "developer",
124+
{"role": "system",
125125
"content": system_prompt}
126126
)
127127

@@ -140,6 +140,8 @@ def chat_completion(
140140

141141
messages.append({"role": "user", "content": user_content})
142142

143+
logging.pii(messages)
144+
143145
# Build API call parameters
144146
params = {
145147
"model": self.model,

utils/llm/prompts.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@
101101
"response_brief": "One sentence response to the user request.",
102102
"response_full": "Further details. Maximum three sentences."
103103
}
104-
104+
"""
105+
OLD_END_OF_FOLLOWUP_PROMPT = """
105106
The user may add a note to focus on a specific part of the image
106107
and an updated picture with the area of interest marked with a red rectangle.
107108
In this case, answer the question ONLY about the contents

0 commit comments

Comments
 (0)