From 43a44964a6b690a08346afce5e8944341b61c9a6 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Wed, 11 Mar 2026 12:57:02 -0400 Subject: [PATCH 1/2] Add prompt_services.py file --- server/api/services/prompt_services.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 server/api/services/prompt_services.py diff --git a/server/api/services/prompt_services.py b/server/api/services/prompt_services.py new file mode 100644 index 00000000..3da3ec47 --- /dev/null +++ b/server/api/services/prompt_services.py @@ -0,0 +1,4 @@ +""" +Centralized prompt management for the application. +Contains all prompts used across different services. +""" \ No newline at end of file From 8de92c9d3ee3376ed6b8b2e35d81eabc6d5d128c Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Wed, 11 Mar 2026 15:13:36 -0400 Subject: [PATCH 2/2] Centralize all model prompts in prompt_services.py --- server/api/services/llm_services.py | 53 +--- server/api/services/prompt_services.py | 276 +++++++++++++++++- server/api/services/tools/tools.py | 23 +- server/api/views/ai_promptStorage/models.py | 15 + server/api/views/assistant/views.py | 63 +--- server/api/views/conversations/views.py | 17 +- server/api/views/embeddings/embeddingsView.py | 19 +- server/api/views/risk/views.py | 3 +- .../api/views/risk/views_riskWithSources.py | 19 +- server/api/views/text_extraction/views.py | 50 +--- server/api/views/uploadFile/title.py | 4 +- 11 files changed, 334 insertions(+), 208 deletions(-) diff --git a/server/api/services/llm_services.py b/server/api/services/llm_services.py index 69df8172..e7f78e97 100644 --- a/server/api/services/llm_services.py +++ b/server/api/services/llm_services.py @@ -8,6 +8,7 @@ from abc import ABC, abstractmethod from openai import AsyncOpenAI +from .prompt_services import LLM_EXTRACTION_INSTRUCTIONS class BaseModelHandler(ABC): @@ -70,57 +71,7 @@ class GPT41NanoHandler(BaseModelHandler): # Long context performance can degrade as more items are required to be retrieved, # or perform complex reasoning that requires knowledge of the state of the entire context - # - - INSTRUCTIONS = """ - - # Role and Objective - - - You are a seasoned physician or medical professional who is developing a bipolar disorder treatment algorithim - - - You are extracting bipolar medication decision points from a research paper that is chunked into multiple parts each labeled with an ID - - # Instructions - - - Identify decision points for bipolar medications - - - For each decision point you find, return a JSON object using the following format: - - { - "criterion": "", - "decision": "INCLUDE" or "EXCLUDE", - "medications": ["", "", ...], - "reason": "", - "sources": [""] - } - - - - Only extract bipolar medication decision points that are explicitly stated or strongly implied in the context and never rely on your own knowledge - - # Output Format - - - Return the extracted bipolar medication decision points as a JSON array and if no decision points are found in the context return an empty array - - # Example - - [ - { - "criterion": "History of suicide attempts", - "decision": "INCLUDE", - "medications": ["Lithium"], - "reason": "Lithium is the only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder", - "sources": ["ID-0"] - }, - { - "criterion": "Weight gain concerns", - "decision": "EXCLUDE", - "medications": ["Quetiapine", "Aripiprazole", "Olanzapine", "Risperidone"], - "reason": "Seroquel, Risperdal, Abilify, and Zyprexa are known for causing weight gain", - "sources": ["ID-0", "ID-1", "ID-2"] - } - ] - - """ + INSTRUCTIONS = LLM_EXTRACTION_INSTRUCTIONS def __init__(self) -> None: self.client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY")) diff --git a/server/api/services/prompt_services.py b/server/api/services/prompt_services.py index 3da3ec47..df57c78f 100644 --- a/server/api/services/prompt_services.py +++ b/server/api/services/prompt_services.py @@ -1,4 +1,276 @@ """ Centralized prompt management for the application. -Contains all prompts used across different services. -""" \ No newline at end of file +Contains all prompts used across different services as module-level constants. + +FUTURE: This module is intended to serve as the fallback/default layer in a +hybrid prompt system. The ai_promptStorage database model (api.views.ai_promptStorage) +provides the infrastructure for runtime-editable prompt overrides via Django admin. +When runtime prompt editing becomes a requirement, implement a get_prompt(key, default) +lookup here that checks ai_promptStorage first and falls back to these constants. +See: server/api/views/ai_promptStorage/models.py +""" + +# --------------------------------------------------------------------------- +# A. assistant/ +# --------------------------------------------------------------------------- + +ASSISTANT_TOOL_DESCRIPTION = """ +Search the user's uploaded documents for information relevant to answering their question. +Call this function when you need to find specific information from the user's documents +to provide an accurate, citation-backed response. Always search before answering questions +about document content. +""" + +ASSISTANT_TOOL_QUERY_DESCRIPTION = """ +A specific search query to find relevant information in the user's documents. +Use keywords, phrases, or questions related to what the user is asking about. +Be specific rather than generic - use terms that would appear in the relevant documents. +""" + +ASSISTANT_SYSTEM_PROMPT = """ +You are an AI assistant that helps users find and understand information about bipolar disorder +from your internal library of bipolar disorder research sources using semantic search. + +IMPORTANT CONTEXT: +- You have access to a library of sources that the user CANNOT see +- The user did not upload these sources and doesn't know about them +- You must explain what information exists in your sources and provide clear references + +TOPIC RESTRICTIONS: +When a prompt is received that is unrelated to bipolar disorder, mental health treatment, +or psychiatric medications, respond by saying you are limited to bipolar-specific conversations. + +SEMANTIC SEARCH STRATEGY: +- Always perform semantic search using the search_documents function when users ask questions +- Use conceptually related terms and synonyms, not just exact keyword matches +- Search for the meaning and context of the user's question, not just literal words +- Consider medical terminology, lay terms, and related conditions when searching + +FUNCTION USAGE: +- When a user asks about information that might be in your source library, ALWAYS use the search_documents function first +- Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question +- Only provide answers based on information found through your source searches + +RESPONSE FORMAT: +After gathering information through semantic searches, provide responses that: +1. Answer the user's question directly using only the found information +2. Structure responses with clear sections and paragraphs +3. Explain what information you found in your sources and provide context +4. Include citations using this exact format: [Name {name}, Page {page_number}] +5. Only cite information that directly supports your statements + +If no relevant information is found in your source library, clearly state that the information +is not available in your current sources. + +REMEMBER: You are working with an internal library of bipolar disorder sources that the user +cannot see. Always search these sources first, explain what you found, and provide proper citations. +""" + +# --------------------------------------------------------------------------- +# B. conversations/ +# --------------------------------------------------------------------------- + +CONVERSATIONS_SYSTEM_PROMPT = ( + "You are a knowledgeable assistant. Balancer is a powerful tool for selecting bipolar medication " + "for patients. We are open-source and available for free use. Your primary role is to assist " + "licensed clinical professionals with information related to Balancer and bipolar medication " + "selection. If applicable, use the supplied tools to assist the professional." +) + +CONVERSATIONS_PAGE_CONTEXT_TEMPLATE = ( + "If applicable, please use the following content to ask questions. " + "If not applicable, please answer to the best of your ability: {page_context}" +) + +CONVERSATIONS_TITLE_SYSTEM_PROMPT = ( + "You are a helpful assistant that generates short, descriptive titles." +) + +CONVERSATIONS_TITLE_USER_TEMPLATE = ( + "Based on the following conversation, generate a short, descriptive title (max 6 words):\n\n{context}" +) + +# Legacy prompt used by the extract_text() function. +CONVERSATIONS_LEGACY_SYSTEM_TEMPLATE = "Give a brief description of this medicine: {medicine}" + +# --------------------------------------------------------------------------- +# C. embeddings/ +# --------------------------------------------------------------------------- + +# {listOfEmbeddings} is the only runtime placeholder. +# {{file_id}}, {{page_number}}, {{chunk_number}} are escaped so they render as +# literal {file_id} / {page_number} / {chunk_number} in the string sent to the LLM. +EMBEDDINGS_SYSTEM_PROMPT_TEMPLATE = ( + "You are an AI assistant tasked with providing detailed, well-structured responses based on the " + "information provided in [PROVIDED-INFO]. Follow these guidelines strictly: \n" + "1. Content: Use information contained within [PROVIDED-INFO] to answer the question. \n" + "2. Organization: Structure your response with clear sections and paragraphs. \n" + "3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation " + "in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . " + "Only use citations that correspond to the information you're presenting. \n" + "4. Clarity: Ensure your answer is well-structured and easy to follow. \n" + "5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. \n" + "Here's an example of the required response format:\n" + "________________________________________ \n" + "See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 " + "individual sales on a Friday, and an additional 3,931 transactions on a Saturday " + "***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***.\n" + "People like the consumption of fudge and peanut brittle the most " + "***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. \n" + "Here is the history of See's Candy: the company was purchased in 1972, and its products have not " + "been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. \n" + "Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood " + "stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. " + "For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine " + "monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. \n" + "________________________________________ \n" + "Please provide your response to the user's question following these guidelines precisely.\n" + "[PROVIDED-INFO] = {listOfEmbeddings}" +) + +# --------------------------------------------------------------------------- +# D. risk/ +# --------------------------------------------------------------------------- + +# Shared by risk/views.py and risk/views_riskWithSources.py (default path). +RISK_BENEFITS_RISKS_TEMPLATE = ( + "You are to provide a concise list of 5 key benefits and 5 key risks for the medication suggested " + "when taking it for Bipolar. Each point should be short, clear and be kept under 10 words. " + "Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!. " + "Please provide this information for the medication: {drug}." +) + +# Diagnosis-perspective variant used in views_riskWithSources._get_ai_response_for_diagnosis(). +RISK_DIAGNOSIS_BENEFITS_RISKS_TEMPLATE = ( + "You are providing medication information from a diagnosis/clinical perspective. " + "Provide a concise list of 5 key benefits and 5 key risks for the medication {drug} " + "when prescribed for Bipolar disorder, focusing on clinical evidence and diagnostic considerations. " + "Each point should be short, clear and be kept under 10 words. " + "Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!." +) + +# --------------------------------------------------------------------------- +# E. text_extraction/ +# --------------------------------------------------------------------------- + +TEXT_EXTRACTION_ANTHROPIC_USER_PROMPT = """ +I'm creating a system to analyze medical research. It processes peer-reviewed papers to extract key details + +Act as a seasoned physician or medical professional who treat patients with bipolar disorder + +Identify rules for medication inclusion or exclusion based on medical history or concerns + +Return an output with the same structure as these examples: + +The rule is history of suicide attempts. The type of rule is "INCLUDE". The reason is lithium is the +only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder. +The medications for this rule are lithium. + +The rule is weight gain concerns. The type of rule is "EXCLUDE". The reason is Seroquel, Risperdal, Abilify, and +Zyprexa are known for causing weight gain. The medications for this rule are Quetiapine, Aripiprazole, Olanzapine, Risperidone +} +""" + +TEXT_EXTRACTION_OPENAI_SYSTEM_PROMPT = """ +You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X]. + +Act as a seasoned physician or medical professional who treats patients with bipolar disorder. + +Identify rules for medication inclusion or exclusion based on medical history or concerns. + +For each rule you find, return a JSON object using the following format: + +{ + "rule": "", + "type": "INCLUDE" or "EXCLUDE", + "reason": "", + "medications": ["", "", ...], + "source": "" +} + +Only include rules that are explicitly stated or strongly implied in the chunk. + +Only use the chunks provided. If no rule is found in a chunk, skip it. + +Return the entire output as a JSON array. +""" + +# --------------------------------------------------------------------------- +# F. services/ +# --------------------------------------------------------------------------- + +LLM_EXTRACTION_INSTRUCTIONS = """ + +# Role and Objective + +- You are a seasoned physician or medical professional who is developing a bipolar disorder treatment algorithim + +- You are extracting bipolar medication decision points from a research paper that is chunked into multiple parts each labeled with an ID + +# Instructions + +- Identify decision points for bipolar medications + +- For each decision point you find, return a JSON object using the following format: + + { + "criterion": "", + "decision": "INCLUDE" or "EXCLUDE", + "medications": ["", "", ...], + "reason": "", + "sources": [""] + } + + +- Only extract bipolar medication decision points that are explicitly stated or strongly implied in the context and never rely on your own knowledge + +# Output Format + +- Return the extracted bipolar medication decision points as a JSON array and if no decision points are found in the context return an empty array + +# Example + +[ + { + "criterion": "History of suicide attempts", + "decision": "INCLUDE", + "medications": ["Lithium"], + "reason": "Lithium is the only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder", + "sources": ["ID-0"] + }, + { + "criterion": "Weight gain concerns", + "decision": "EXCLUDE", + "medications": ["Quetiapine", "Aripiprazole", "Olanzapine", "Risperidone"], + "reason": "Seroquel, Risperdal, Abilify, and Zyprexa are known for causing weight gain", + "sources": ["ID-0", "ID-1", "ID-2"] + } +] + +""" + +UPLOAD_FILE_TITLE_PROMPT = ( + "Please provide a title for this document. " + "The title should be less than 256 characters and will be displayed on a webpage." +) + +TOOL_SQL_QUERY_DESCRIPTION = """ +Use this function to answer user questions about medication in the Balancer database. +The Balancer medication database stores medications by their official medical (generic) names, not brand names. +Therefore: +- Brand names should be converted to their official medical names before querying. +- Queries should be case-insensitive to handle any variation in how medication names are stored (e.g., "Lurasidone", "lurasidone"). +Input should be a fully formed SQL query. +Important guidelines: +- Always use case-insensitive matching in queries by converting both the database column and the input to lowercase. +For example, in SQL: +- PostgreSQL: `LOWER(name) = LOWER('lurasidone')` +""" + +# {database_schema_string} is substituted at import time in tools.py. +TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE = """ +SQL query extracting info to answer the user's question. +SQL should be written using this database schema: +{database_schema_string} +The query should be returned in plain text, not in JSON. +""" diff --git a/server/api/services/tools/tools.py b/server/api/services/tools/tools.py index f9fa14c8..6359e538 100644 --- a/server/api/services/tools/tools.py +++ b/server/api/services/tools/tools.py @@ -2,6 +2,7 @@ from typing import Dict, Any, Callable, List from dataclasses import dataclass from .database import ask_database, get_database_info +from ..prompt_services import TOOL_SQL_QUERY_DESCRIPTION, TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE database_schema_dict = get_database_info(connection) database_schema_string = "\n".join( @@ -36,27 +37,13 @@ def create_tool_dict(tool: ToolFunction) -> Dict[str, Any]: ToolFunction( name="ask_database", func=ask_database, - description=""" - Use this function to answer user questions about medication in the Balancer database. - The Balancer medication database stores medications by their official medical (generic) names, not brand names. - Therefore: - - Brand names should be converted to their official medical names before querying. - - Queries should be case-insensitive to handle any variation in how medication names are stored (e.g., "Lurasidone", "lurasidone"). - Input should be a fully formed SQL query. - Important guidelines: - - Always use case-insensitive matching in queries by converting both the database column and the input to lowercase. - For example, in SQL: - - PostgreSQL: `LOWER(name) = LOWER('lurasidone')` - """, + description=TOOL_SQL_QUERY_DESCRIPTION, parameters={ "query": { "type": "string", - "description": f""" - SQL query extracting info to answer the user's question. - SQL should be written using this database schema: - {database_schema_string} - The query should be returned in plain text, not in JSON. - """ + "description": TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE.format( + database_schema_string=database_schema_string + ) } } ), diff --git a/server/api/views/ai_promptStorage/models.py b/server/api/views/ai_promptStorage/models.py index 3e7bf467..8e2bf62e 100644 --- a/server/api/views/ai_promptStorage/models.py +++ b/server/api/views/ai_promptStorage/models.py @@ -10,6 +10,21 @@ class AI_PromptStorage(models.Model): + """ + Database-backed storage for AI prompt overrides. + + Currently unused at runtime — prompts are managed as code constants in + api.services.prompt_services. This model is intended to support runtime + prompt editing (without a code deploy) when that becomes a requirement. + + Intended future use: a get_prompt() function in prompt_services.py queries + this table first (filtered by Area and IsActive=True) and falls back to the + code constant if no active record is found. + + NOTE: Before activating runtime use, the store_prompt endpoint needs its + permission_classes restored (currently commented out in views.py). + """ + guid = models.UUIDField(default=uuid.uuid4, editable=False, unique=True) PromptText = models.TextField() IsActive = models.BooleanField(default=True) diff --git a/server/api/views/assistant/views.py b/server/api/views/assistant/views.py index e3e8d6f7..6f39b246 100644 --- a/server/api/views/assistant/views.py +++ b/server/api/views/assistant/views.py @@ -17,6 +17,11 @@ from ...services.embedding_services import get_closest_embeddings from ...services.conversions_services import convert_uuids +from ...services.prompt_services import ( + ASSISTANT_TOOL_DESCRIPTION, + ASSISTANT_TOOL_QUERY_DESCRIPTION, + ASSISTANT_SYSTEM_PROMPT, +) # Configure logging logger = logging.getLogger(__name__) @@ -136,30 +141,17 @@ def post(self, request): client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) - TOOL_DESCRIPTION = """ - Search the user's uploaded documents for information relevant to answering their question. - Call this function when you need to find specific information from the user's documents - to provide an accurate, citation-backed response. Always search before answering questions - about document content. - """ - - TOOL_PROPERTY_DESCRIPTION = """ - A specific search query to find relevant information in the user's documents. - Use keywords, phrases, or questions related to what the user is asking about. - Be specific rather than generic - use terms that would appear in the relevant documents. - """ - tools = [ { "type": "function", "name": "search_documents", - "description": TOOL_DESCRIPTION, + "description": ASSISTANT_TOOL_DESCRIPTION, "parameters": { "type": "object", "properties": { "query": { "type": "string", - "description": TOOL_PROPERTY_DESCRIPTION, + "description": ASSISTANT_TOOL_QUERY_DESCRIPTION, } }, "required": ["query"], @@ -212,47 +204,8 @@ def search_documents(query: str, user=user) -> str: except Exception as e: return f"Error searching documents: {str(e)}. Please try again if the issue persists." - INSTRUCTIONS = """ - You are an AI assistant that helps users find and understand information about bipolar disorder - from your internal library of bipolar disorder research sources using semantic search. - - IMPORTANT CONTEXT: - - You have access to a library of sources that the user CANNOT see - - The user did not upload these sources and doesn't know about them - - You must explain what information exists in your sources and provide clear references - - TOPIC RESTRICTIONS: - When a prompt is received that is unrelated to bipolar disorder, mental health treatment, - or psychiatric medications, respond by saying you are limited to bipolar-specific conversations. - - SEMANTIC SEARCH STRATEGY: - - Always perform semantic search using the search_documents function when users ask questions - - Use conceptually related terms and synonyms, not just exact keyword matches - - Search for the meaning and context of the user's question, not just literal words - - Consider medical terminology, lay terms, and related conditions when searching - - FUNCTION USAGE: - - When a user asks about information that might be in your source library, ALWAYS use the search_documents function first - - Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question - - Only provide answers based on information found through your source searches - - RESPONSE FORMAT: - After gathering information through semantic searches, provide responses that: - 1. Answer the user's question directly using only the found information - 2. Structure responses with clear sections and paragraphs - 3. Explain what information you found in your sources and provide context - 4. Include citations using this exact format: [Name {name}, Page {page_number}] - 5. Only cite information that directly supports your statements - - If no relevant information is found in your source library, clearly state that the information - is not available in your current sources. - - REMEMBER: You are working with an internal library of bipolar disorder sources that the user - cannot see. Always search these sources first, explain what you found, and provide proper citations. - """ - MODEL_DEFAULTS = { - "instructions": INSTRUCTIONS, + "instructions": ASSISTANT_SYSTEM_PROMPT, "model": "gpt-5-nano", # 400,000 token context window # A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process. "reasoning": {"effort": "low", "summary": None}, diff --git a/server/api/views/conversations/views.py b/server/api/views/conversations/views.py index de927cf1..385d2c43 100644 --- a/server/api/views/conversations/views.py +++ b/server/api/views/conversations/views.py @@ -16,6 +16,13 @@ from .models import Conversation, Message from .serializers import ConversationSerializer from ...services.tools.tools import tools, execute_tool +from ...services.prompt_services import ( + CONVERSATIONS_SYSTEM_PROMPT, + CONVERSATIONS_PAGE_CONTEXT_TEMPLATE, + CONVERSATIONS_TITLE_SYSTEM_PROMPT, + CONVERSATIONS_TITLE_USER_TEMPLATE, + CONVERSATIONS_LEGACY_SYSTEM_TEMPLATE, +) from drf_spectacular.utils import extend_schema, inline_serializer from rest_framework import serializers as drf_serializers @@ -49,7 +56,7 @@ def extract_text(request: str) -> JsonResponse: messages=[ { "role": "system", - "content": "Give a brief description of this medicine: %s" % tokens, + "content": CONVERSATIONS_LEGACY_SYSTEM_TEMPLATE.format(medicine=tokens), } ], max_tokens=500, @@ -171,11 +178,11 @@ def get_chatgpt_response(self, conversation, user_message, page_context=None): client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) messages = [{ "role": "system", - "content": "You are a knowledgeable assistant. Balancer is a powerful tool for selecting bipolar medication for patients. We are open-source and available for free use. Your primary role is to assist licensed clinical professionals with information related to Balancer and bipolar medication selection. If applicable, use the supplied tools to assist the professional." + "content": CONVERSATIONS_SYSTEM_PROMPT }] if page_context: - context_message = f"If applicable, please use the following content to ask questions. If not applicable, please answer to the best of your ability: {page_context}" + context_message = CONVERSATIONS_PAGE_CONTEXT_TEMPLATE.format(page_context=page_context) messages.append({"role": "system", "content": context_message}) for msg in conversation.messages.all(): role = "user" if msg.is_user else "assistant" @@ -240,13 +247,13 @@ def get_chatgpt_response(self, conversation, user_message, page_context=None): def generate_title(self, conversation): messages = conversation.messages.all()[:2] context = "\n".join([msg.content for msg in messages]) - prompt = f"Based on the following conversation, generate a short, descriptive title (max 6 words):\n\n{context}" + prompt = CONVERSATIONS_TITLE_USER_TEMPLATE.format(context=context) client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ - {"role": "system", "content": "You are a helpful assistant that generates short, descriptive titles."}, + {"role": "system", "content": CONVERSATIONS_TITLE_SYSTEM_PROMPT}, {"role": "user", "content": prompt} ] ) diff --git a/server/api/views/embeddings/embeddingsView.py b/server/api/views/embeddings/embeddingsView.py index ebcf0774..2bb670cc 100644 --- a/server/api/views/embeddings/embeddingsView.py +++ b/server/api/views/embeddings/embeddingsView.py @@ -7,6 +7,7 @@ from ...services.embedding_services import get_closest_embeddings from ...services.conversions_services import convert_uuids from ...services.openai_services import openAIServices +from ...services.prompt_services import EMBEDDINGS_SYSTEM_PROMPT_TEMPLATE from django.utils.decorators import method_decorator from django.views.decorators.csrf import csrf_exempt import json @@ -57,23 +58,7 @@ def post(self, request, *args, **kwargs): listOfEmbeddings = " ".join(prompt_texts) - prompt_text = ( - f"""You are an AI assistant tasked with providing detailed, well-structured responses based on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: - 1. Content: Use information contained within [PROVIDED-INFO] to answer the question. - 2. Organization: Structure your response with clear sections and paragraphs. - 3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . Only use citations that correspond to the information you're presenting. - 4. Clarity: Ensure your answer is well-structured and easy to follow. - 5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. - Here's an example of the required response format: - ________________________________________ - See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 individual sales on a Friday, and an additional 3,931 transactions on a Saturday ***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***. - People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. - Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. - Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. - ________________________________________ - Please provide your response to the user's question following these guidelines precisely. - [PROVIDED-INFO] = {listOfEmbeddings}""" - ) + prompt_text = EMBEDDINGS_SYSTEM_PROMPT_TEMPLATE.format(listOfEmbeddings=listOfEmbeddings) if stream: def stream_generator(): diff --git a/server/api/views/risk/views.py b/server/api/views/risk/views.py index 99327a8d..0a5b2dc8 100644 --- a/server/api/views/risk/views.py +++ b/server/api/views/risk/views.py @@ -3,6 +3,7 @@ import openai import json from api.views.listMeds.models import Medication +from api.services.prompt_services import RISK_BENEFITS_RISKS_TEMPLATE # XXX: remove csrf_exempt usage before production from django.views.decorators.csrf import csrf_exempt @@ -33,7 +34,7 @@ def medication(request): messages=[ { "role": "system", - "content": f"You are to provide a concise list of 5 key benefits and 5 key risks for the medication suggested when taking it for Bipolar. Each point should be short, clear and be kept under 10 words. Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!. Please provide this information for the medication: {diagnosis}." + "content": RISK_BENEFITS_RISKS_TEMPLATE.format(drug=diagnosis) } ] ) diff --git a/server/api/views/risk/views_riskWithSources.py b/server/api/views/risk/views_riskWithSources.py index 26cad9f8..b24ada38 100644 --- a/server/api/views/risk/views_riskWithSources.py +++ b/server/api/views/risk/views_riskWithSources.py @@ -7,6 +7,10 @@ from api.models.model_medRule import MedRule, MedRuleSource import openai import os +from api.services.prompt_services import ( + RISK_BENEFITS_RISKS_TEMPLATE, + RISK_DIAGNOSIS_BENEFITS_RISKS_TEMPLATE, +) class RiskWithSourcesView(APIView): @@ -82,12 +86,7 @@ def post(self, request): }) except Medication.DoesNotExist: - prompt = ( - f"You are to provide a concise list of 5 key benefits and 5 key risks " - f"for the medication suggested when taking it for Bipolar. Each point should be short, " - f"clear and be kept under 10 words. Begin the benefits section with !!!benefits!!! and " - f"the risks section with !!!risk!!!. Please provide this information for the medication: {drug}." - ) + prompt = RISK_BENEFITS_RISKS_TEMPLATE.format(drug=drug) try: ai_response = openai.ChatCompletion.create( @@ -453,13 +452,7 @@ def _build_pdf_link(self, embedding): def _get_ai_response_for_diagnosis(self, drug): """Get AI response with diagnosis-specific context""" - prompt = ( - f"You are providing medication information from a diagnosis/clinical perspective. " - f"Provide a concise list of 5 key benefits and 5 key risks for the medication {drug} " - f"when prescribed for Bipolar disorder, focusing on clinical evidence and diagnostic considerations. " - f"Each point should be short, clear and be kept under 10 words. " - f"Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!." - ) + prompt = RISK_DIAGNOSIS_BENEFITS_RISKS_TEMPLATE.format(drug=drug) try: ai_response = openai.ChatCompletion.create( diff --git a/server/api/views/text_extraction/views.py b/server/api/views/text_extraction/views.py index 020740ad..1648f43a 100644 --- a/server/api/views/text_extraction/views.py +++ b/server/api/views/text_extraction/views.py @@ -14,24 +14,10 @@ from ...services.openai_services import openAIServices from api.models.model_embeddings import Embeddings - -USER_PROMPT = """ -I'm creating a system to analyze medical research. It processes peer-reviewed papers to extract key details - -Act as a seasoned physician or medical professional who treat patients with bipolar disorder - -Identify rules for medication inclusion or exclusion based on medical history or concerns - -Return an output with the same structure as these examples: - -The rule is history of suicide attempts. The type of rule is "INCLUDE". The reason is lithium is the -only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder. -The medications for this rule are lithium. - -The rule is weight gain concerns. The type of rule is "EXCLUDE". The reason is Seroquel, Risperdal, Abilify, and -Zyprexa are known for causing weight gain. The medications for this rule are Quetiapine, Aripiprazole, Olanzapine, Risperidone -} -""" +from ...services.prompt_services import ( + TEXT_EXTRACTION_ANTHROPIC_USER_PROMPT, + TEXT_EXTRACTION_OPENAI_SYSTEM_PROMPT, +) def anthropic_citations(client: anthropic.Client, user_prompt: str, content_chunks: list) -> tuple: @@ -125,7 +111,7 @@ def get(self, request): # TODO: Format into the Anthropic API"s expected input format in the anthropic_citations function chunks = [{"type": "text", "text": chunk.text} for chunk in query] - texts, cited_texts = anthropic_citations(client, USER_PROMPT, chunks) + texts, cited_texts = anthropic_citations(client, TEXT_EXTRACTION_ANTHROPIC_USER_PROMPT, chunks) return Response({"texts": texts, "cited_texts": cited_texts}, status=status.HTTP_200_OK) @@ -172,30 +158,6 @@ class RuleExtractionAPIOpenAIView(APIView): ) def get(self, request): try: - user_prompt = """ - You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X]. - - Act as a seasoned physician or medical professional who treats patients with bipolar disorder. - - Identify rules for medication inclusion or exclusion based on medical history or concerns. - - For each rule you find, return a JSON object using the following format: - - { - "rule": "", - "type": "INCLUDE" or "EXCLUDE", - "reason": "", - "medications": ["", "", ...], - "source": "" - } - - Only include rules that are explicitly stated or strongly implied in the chunk. - - Only use the chunks provided. If no rule is found in a chunk, skip it. - - Return the entire output as a JSON array. - """ - guid = request.query_params.get('guid') query = Embeddings.objects.filter(upload_file__guid=guid) chunks = [ @@ -203,7 +165,7 @@ def get(self, request): for i, chunk in enumerate(query) ] - output_text = openai_extraction(chunks, user_prompt) + output_text = openai_extraction(chunks, TEXT_EXTRACTION_OPENAI_SYSTEM_PROMPT) cleaned_text = re.sub(r"^```json|```$", "", output_text.strip()).strip() rules = json.loads(cleaned_text) diff --git a/server/api/views/uploadFile/title.py b/server/api/views/uploadFile/title.py index 17f52a74..d626d1c3 100644 --- a/server/api/views/uploadFile/title.py +++ b/server/api/views/uploadFile/title.py @@ -3,6 +3,7 @@ import fitz from api.services.openai_services import openAIServices +from api.services.prompt_services import UPLOAD_FILE_TITLE_PROMPT # regular expression to match common research white paper titles. Created by Chat-gpt @@ -55,9 +56,8 @@ def summarize_pdf(pdf: fitz.Document) -> str: raise Exception("Failed to read the first page of the PDF file") # UploadFile model title is limited to 255 chars. - prompt = "Please provide a title for this document. The title should be less than 256 characters and will be displayed on a webpage." response = openAIServices.openAI( - first_page_content, prompt, model='gpt-4o', temp=0.0) + first_page_content, UPLOAD_FILE_TITLE_PROMPT, model='gpt-4o', temp=0.0) title = response.choices[0].message.content.strip().strip('"').strip("'") # Truncate to fit UploadFile model's max_length=255 title field as a final safeguard return title[:255]