Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 2 additions & 51 deletions server/api/services/llm_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from abc import ABC, abstractmethod

from openai import AsyncOpenAI
from .prompt_services import LLM_EXTRACTION_INSTRUCTIONS


class BaseModelHandler(ABC):
Expand Down Expand Up @@ -70,57 +71,7 @@ class GPT41NanoHandler(BaseModelHandler):
# Long context performance can degrade as more items are required to be retrieved,
# or perform complex reasoning that requires knowledge of the state of the entire context

#

INSTRUCTIONS = """

# Role and Objective

- You are a seasoned physician or medical professional who is developing a bipolar disorder treatment algorithim

- You are extracting bipolar medication decision points from a research paper that is chunked into multiple parts each labeled with an ID

# Instructions

- Identify decision points for bipolar medications

- For each decision point you find, return a JSON object using the following format:

{
"criterion": "<condition or concern>",
"decision": "INCLUDE" or "EXCLUDE",
"medications": ["<medication 1>", "<medication 2>", ...],
"reason": "<short explanation for why this criterion applies>",
"sources": ["<ID-X>"]
}


- Only extract bipolar medication decision points that are explicitly stated or strongly implied in the context and never rely on your own knowledge

# Output Format

- Return the extracted bipolar medication decision points as a JSON array and if no decision points are found in the context return an empty array

# Example

[
{
"criterion": "History of suicide attempts",
"decision": "INCLUDE",
"medications": ["Lithium"],
"reason": "Lithium is the only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder",
"sources": ["ID-0"]
},
{
"criterion": "Weight gain concerns",
"decision": "EXCLUDE",
"medications": ["Quetiapine", "Aripiprazole", "Olanzapine", "Risperidone"],
"reason": "Seroquel, Risperdal, Abilify, and Zyprexa are known for causing weight gain",
"sources": ["ID-0", "ID-1", "ID-2"]
}
]

"""
INSTRUCTIONS = LLM_EXTRACTION_INSTRUCTIONS

def __init__(self) -> None:
self.client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
Expand Down
276 changes: 276 additions & 0 deletions server/api/services/prompt_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
"""
Centralized prompt management for the application.
Contains all prompts used across different services as module-level constants.

FUTURE: This module is intended to serve as the fallback/default layer in a
hybrid prompt system. The ai_promptStorage database model (api.views.ai_promptStorage)
provides the infrastructure for runtime-editable prompt overrides via Django admin.
When runtime prompt editing becomes a requirement, implement a get_prompt(key, default)
lookup here that checks ai_promptStorage first and falls back to these constants.
See: server/api/views/ai_promptStorage/models.py
"""

# ---------------------------------------------------------------------------
# A. assistant/
# ---------------------------------------------------------------------------

ASSISTANT_TOOL_DESCRIPTION = """
Search the user's uploaded documents for information relevant to answering their question.
Call this function when you need to find specific information from the user's documents
to provide an accurate, citation-backed response. Always search before answering questions
about document content.
"""

ASSISTANT_TOOL_QUERY_DESCRIPTION = """
A specific search query to find relevant information in the user's documents.
Use keywords, phrases, or questions related to what the user is asking about.
Be specific rather than generic - use terms that would appear in the relevant documents.
"""

ASSISTANT_SYSTEM_PROMPT = """
You are an AI assistant that helps users find and understand information about bipolar disorder
from your internal library of bipolar disorder research sources using semantic search.

IMPORTANT CONTEXT:
- You have access to a library of sources that the user CANNOT see
- The user did not upload these sources and doesn't know about them
- You must explain what information exists in your sources and provide clear references

TOPIC RESTRICTIONS:
When a prompt is received that is unrelated to bipolar disorder, mental health treatment,
or psychiatric medications, respond by saying you are limited to bipolar-specific conversations.

SEMANTIC SEARCH STRATEGY:
- Always perform semantic search using the search_documents function when users ask questions
- Use conceptually related terms and synonyms, not just exact keyword matches
- Search for the meaning and context of the user's question, not just literal words
- Consider medical terminology, lay terms, and related conditions when searching

FUNCTION USAGE:
- When a user asks about information that might be in your source library, ALWAYS use the search_documents function first
- Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question
- Only provide answers based on information found through your source searches

RESPONSE FORMAT:
After gathering information through semantic searches, provide responses that:
1. Answer the user's question directly using only the found information
2. Structure responses with clear sections and paragraphs
3. Explain what information you found in your sources and provide context
4. Include citations using this exact format: [Name {name}, Page {page_number}]
5. Only cite information that directly supports your statements

If no relevant information is found in your source library, clearly state that the information
is not available in your current sources.

REMEMBER: You are working with an internal library of bipolar disorder sources that the user
cannot see. Always search these sources first, explain what you found, and provide proper citations.
"""

# ---------------------------------------------------------------------------
# B. conversations/
# ---------------------------------------------------------------------------

CONVERSATIONS_SYSTEM_PROMPT = (
"You are a knowledgeable assistant. Balancer is a powerful tool for selecting bipolar medication "
"for patients. We are open-source and available for free use. Your primary role is to assist "
"licensed clinical professionals with information related to Balancer and bipolar medication "
"selection. If applicable, use the supplied tools to assist the professional."
)

CONVERSATIONS_PAGE_CONTEXT_TEMPLATE = (
"If applicable, please use the following content to ask questions. "
"If not applicable, please answer to the best of your ability: {page_context}"
)

CONVERSATIONS_TITLE_SYSTEM_PROMPT = (
"You are a helpful assistant that generates short, descriptive titles."
)

CONVERSATIONS_TITLE_USER_TEMPLATE = (
"Based on the following conversation, generate a short, descriptive title (max 6 words):\n\n{context}"
)

# Legacy prompt used by the extract_text() function.
CONVERSATIONS_LEGACY_SYSTEM_TEMPLATE = "Give a brief description of this medicine: {medicine}"

# ---------------------------------------------------------------------------
# C. embeddings/
# ---------------------------------------------------------------------------

# {listOfEmbeddings} is the only runtime placeholder.
# {{file_id}}, {{page_number}}, {{chunk_number}} are escaped so they render as
# literal {file_id} / {page_number} / {chunk_number} in the string sent to the LLM.
EMBEDDINGS_SYSTEM_PROMPT_TEMPLATE = (
"You are an AI assistant tasked with providing detailed, well-structured responses based on the "
"information provided in [PROVIDED-INFO]. Follow these guidelines strictly: \n"
"1. Content: Use information contained within [PROVIDED-INFO] to answer the question. \n"
"2. Organization: Structure your response with clear sections and paragraphs. \n"
"3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation "
"in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . "
"Only use citations that correspond to the information you're presenting. \n"
"4. Clarity: Ensure your answer is well-structured and easy to follow. \n"
"5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. \n"
"Here's an example of the required response format:\n"
"________________________________________ \n"
"See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 "
"individual sales on a Friday, and an additional 3,931 transactions on a Saturday "
"***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***.\n"
"People like the consumption of fudge and peanut brittle the most "
"***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. \n"
"Here is the history of See's Candy: the company was purchased in 1972, and its products have not "
"been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. \n"
"Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood "
"stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. "
"For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine "
"monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. \n"
"________________________________________ \n"
"Please provide your response to the user's question following these guidelines precisely.\n"
"[PROVIDED-INFO] = {listOfEmbeddings}"
)

# ---------------------------------------------------------------------------
# D. risk/
# ---------------------------------------------------------------------------

# Shared by risk/views.py and risk/views_riskWithSources.py (default path).
RISK_BENEFITS_RISKS_TEMPLATE = (
"You are to provide a concise list of 5 key benefits and 5 key risks for the medication suggested "
"when taking it for Bipolar. Each point should be short, clear and be kept under 10 words. "
"Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!. "
"Please provide this information for the medication: {drug}."
)

# Diagnosis-perspective variant used in views_riskWithSources._get_ai_response_for_diagnosis().
RISK_DIAGNOSIS_BENEFITS_RISKS_TEMPLATE = (
"You are providing medication information from a diagnosis/clinical perspective. "
"Provide a concise list of 5 key benefits and 5 key risks for the medication {drug} "
"when prescribed for Bipolar disorder, focusing on clinical evidence and diagnostic considerations. "
"Each point should be short, clear and be kept under 10 words. "
"Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!."
)

# ---------------------------------------------------------------------------
# E. text_extraction/
# ---------------------------------------------------------------------------

TEXT_EXTRACTION_ANTHROPIC_USER_PROMPT = """
I'm creating a system to analyze medical research. It processes peer-reviewed papers to extract key details

Act as a seasoned physician or medical professional who treat patients with bipolar disorder

Identify rules for medication inclusion or exclusion based on medical history or concerns

Return an output with the same structure as these examples:

The rule is history of suicide attempts. The type of rule is "INCLUDE". The reason is lithium is the
only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder.
The medications for this rule are lithium.

The rule is weight gain concerns. The type of rule is "EXCLUDE". The reason is Seroquel, Risperdal, Abilify, and
Zyprexa are known for causing weight gain. The medications for this rule are Quetiapine, Aripiprazole, Olanzapine, Risperidone
}
"""

TEXT_EXTRACTION_OPENAI_SYSTEM_PROMPT = """
You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X].

Act as a seasoned physician or medical professional who treats patients with bipolar disorder.

Identify rules for medication inclusion or exclusion based on medical history or concerns.

For each rule you find, return a JSON object using the following format:

{
"rule": "<condition or concern>",
"type": "INCLUDE" or "EXCLUDE",
"reason": "<short explanation for why this rule applies>",
"medications": ["<medication 1>", "<medication 2>", ...],
"source": "<chunk-X>"
}

Only include rules that are explicitly stated or strongly implied in the chunk.

Only use the chunks provided. If no rule is found in a chunk, skip it.

Return the entire output as a JSON array.
"""

# ---------------------------------------------------------------------------
# F. services/
# ---------------------------------------------------------------------------

LLM_EXTRACTION_INSTRUCTIONS = """

# Role and Objective

- You are a seasoned physician or medical professional who is developing a bipolar disorder treatment algorithim

- You are extracting bipolar medication decision points from a research paper that is chunked into multiple parts each labeled with an ID

# Instructions

- Identify decision points for bipolar medications

- For each decision point you find, return a JSON object using the following format:

{
"criterion": "<condition or concern>",
"decision": "INCLUDE" or "EXCLUDE",
"medications": ["<medication 1>", "<medication 2>", ...],
"reason": "<short explanation for why this criterion applies>",
"sources": ["<ID-X>"]
}


- Only extract bipolar medication decision points that are explicitly stated or strongly implied in the context and never rely on your own knowledge

# Output Format

- Return the extracted bipolar medication decision points as a JSON array and if no decision points are found in the context return an empty array

# Example

[
{
"criterion": "History of suicide attempts",
"decision": "INCLUDE",
"medications": ["Lithium"],
"reason": "Lithium is the only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder",
"sources": ["ID-0"]
},
{
"criterion": "Weight gain concerns",
"decision": "EXCLUDE",
"medications": ["Quetiapine", "Aripiprazole", "Olanzapine", "Risperidone"],
"reason": "Seroquel, Risperdal, Abilify, and Zyprexa are known for causing weight gain",
"sources": ["ID-0", "ID-1", "ID-2"]
}
]

"""

UPLOAD_FILE_TITLE_PROMPT = (
"Please provide a title for this document. "
"The title should be less than 256 characters and will be displayed on a webpage."
)

TOOL_SQL_QUERY_DESCRIPTION = """
Use this function to answer user questions about medication in the Balancer database.
The Balancer medication database stores medications by their official medical (generic) names, not brand names.
Therefore:
- Brand names should be converted to their official medical names before querying.
- Queries should be case-insensitive to handle any variation in how medication names are stored (e.g., "Lurasidone", "lurasidone").
Input should be a fully formed SQL query.
Important guidelines:
- Always use case-insensitive matching in queries by converting both the database column and the input to lowercase.
For example, in SQL:
- PostgreSQL: `LOWER(name) = LOWER('lurasidone')`
"""

# {database_schema_string} is substituted at import time in tools.py.
TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE = """
SQL query extracting info to answer the user's question.
SQL should be written using this database schema:
{database_schema_string}
The query should be returned in plain text, not in JSON.
"""
23 changes: 5 additions & 18 deletions server/api/services/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Dict, Any, Callable, List
from dataclasses import dataclass
from .database import ask_database, get_database_info
from ..prompt_services import TOOL_SQL_QUERY_DESCRIPTION, TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE

database_schema_dict = get_database_info(connection)
database_schema_string = "\n".join(
Expand Down Expand Up @@ -36,27 +37,13 @@ def create_tool_dict(tool: ToolFunction) -> Dict[str, Any]:
ToolFunction(
name="ask_database",
func=ask_database,
description="""
Use this function to answer user questions about medication in the Balancer database.
The Balancer medication database stores medications by their official medical (generic) names, not brand names.
Therefore:
- Brand names should be converted to their official medical names before querying.
- Queries should be case-insensitive to handle any variation in how medication names are stored (e.g., "Lurasidone", "lurasidone").
Input should be a fully formed SQL query.
Important guidelines:
- Always use case-insensitive matching in queries by converting both the database column and the input to lowercase.
For example, in SQL:
- PostgreSQL: `LOWER(name) = LOWER('lurasidone')`
""",
description=TOOL_SQL_QUERY_DESCRIPTION,
parameters={
"query": {
"type": "string",
"description": f"""
SQL query extracting info to answer the user's question.
SQL should be written using this database schema:
{database_schema_string}
The query should be returned in plain text, not in JSON.
"""
"description": TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE.format(
database_schema_string=database_schema_string
)
}
}
),
Expand Down
Loading
Loading