generated from nhsengland/analyticsunit-template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added generative source files with a full pipeline example
- Loading branch information
1 parent
434be0b
commit 236c7f4
Showing
5 changed files
with
261 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import sys\n", | ||
"\n", | ||
"path_root = os.path.dirname(os.getcwd())\n", | ||
"\n", | ||
"if path_root not in sys.path:\n", | ||
" sys.path.append(path_root)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import src.generate.synthea as synthea\n", | ||
"import src.generate.llm as llm" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"synthea.run(\"./run_synthea\", \"-p\", \"10\", \"West Yorkshire\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"model = \"llama2\"\n", | ||
"template = \"\"\"[INST]\n", | ||
"<<SYS>>\n", | ||
"You are a medical student answering an exam question about writing clinical notes for patients.\n", | ||
"<</SYS>>\n", | ||
"\n", | ||
"Keep in mind that your answer will be asssessed based on incorporating all the provided information and the quality of prose.\n", | ||
"\n", | ||
"1. Use prose to write an example clinical note for this patient's doctor.\n", | ||
"2. Use less than three sentences.\n", | ||
"3. Do not provide a diagnosis or recommendations.\n", | ||
"4. Use the following information:\n", | ||
"\n", | ||
"{data}\n", | ||
"[/INST]\n", | ||
"\"\"\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Clinical Note:\n", | ||
"Patient Dia Spencer, age 23, presents with acute viral pharyngitis. Symptoms include sore throat, fever, and difficulty swallowing. Patient's NHS number is 0427153913.Clinical Note:\n", | ||
"Patient Presentation: Charley Robel, a 12-year-old male, presents with acute viral pharyngitis. The patient's NHS number is 7864995101 and date of birth is March 4th, 2008. Given name is Charley and family name is Robel.\n", | ||
"\n", | ||
"Symptoms: Fever, sore throat, and difficulty swallowing.\n", | ||
"\n", | ||
"Medical History: No significant medical history.\n", | ||
"\n", | ||
"Plan: Observation and close monitoring of symptoms for the next 7-10 days. Antiviral medication may be prescribed if symptoms persist or worsen.Clinical Note:\n", | ||
"Mr. Jackson Schmeler presents today with a two-day history of worsening itching and inflammation on his face, particularly around his eyes and mouth. He reports that the symptoms have been gradually increasing in severity over the past week. His birthdate is May 6th, 2005, and he has been experiencing this condition for a few months now.Clinical Note:\n", | ||
"\n", | ||
"Patient Name: Shaunta Kuhic\n", | ||
"NHS Number: 7925078782\n", | ||
"Date of Birth: April 5, 1997\n", | ||
"\n", | ||
"Presentation: The patient presents with acute viral pharyngitis. She has experienced a sore throat for the past 3 days and has difficulty swallowing. Fever is present but not severe.Clinical Note:\n", | ||
"Patient Hildegard Ratke, NHS number 1520958625, presents for routine prenatal care at 20 weeks gestation. Born on January 20, 1967, she is in her 23rd week of pregnancy. No medical concerns or complications have been identified at this time.Clinical Note:\n", | ||
"Mr. Hermiston presents today with symptoms of viral sinusitis, including nasal congestion, facial pain, and postnasal drip. He reports a gradual onset of these symptoms over the past week, which have significantly impacted his quality of life. His medication list includes decongestants and antihistamines, but he has not experienced significant relief.Clinical Note for George Hartmann:\n", | ||
"\n", | ||
"Mr. Hartmann presents today with a seizure disorder. He was born on July 20, 1952, and his NHS number is 5183634092. He has been experiencing recurring seizures over the past year, with the most recent episode occurring yesterday evening. Further evaluation is necessary to determine the underlying cause of these seizures and develop an appropriate treatment plan.Clinical Note:\n", | ||
"Mr. Walker presents today with a 3-day history of right-sided otalgia and fever. He was seen in the emergency department last night and was prescribed antibiotics, which he has not yet started. He reports no recent travel or exposure to illness. His date of birth is March 25, 1956, and his NHS number is 7448453998.Clinical Note:\n", | ||
"Patient Wonda Bernhard presents with symptoms of acute viral pharyngitis. She is an 82-year-old female who was born on August 6, 1939. Her NHS number is 8491122729. Patient reports sore throat, fever, and difficulty swallowing." | ||
] | ||
} | ||
], | ||
"source": [ | ||
"llm.run(model, template)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
path_synthea = "../../synthea" | ||
path_csv = path_synthea + "/output/csv" | ||
path_patients = path_csv + "/patients.csv" | ||
path_encounters = path_csv + "/encounters.csv" | ||
path_synthea_output = "../data/synthea.json" | ||
|
||
cols_patients = ["Id", "BIRTHDATE", "FIRST", "LAST"] | ||
cols_encounters = ["PATIENT", "ENCOUNTERCLASS", "REASONDESCRIPTION"] | ||
cols = { | ||
"NHS_NUMBER": "NHS_NUMBER", | ||
"BIRTHDATE": "DATE_OF_BIRTH", | ||
"FIRST": "GIVEN_NAME", | ||
"LAST": "FAMILY_NAME", | ||
"REASONDESCRIPTION": "DIAGNOSIS", | ||
} | ||
|
||
path_llm_output = "../data/llm.json" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from .config import path_synthea_output, path_llm_output | ||
from .utils import load_synthea_output, save | ||
from langchain.callbacks.manager import CallbackManager | ||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | ||
from langchain.llms import Ollama | ||
from langchain.prompts import PromptTemplate | ||
import json | ||
|
||
|
||
def run(model, template): | ||
batch = load_synthea_output(path_synthea_output) | ||
|
||
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | ||
|
||
llm = Ollama(model=model, callback_manager=callback_manager) | ||
prompt = PromptTemplate.from_template(template) | ||
chain = prompt | llm | ||
|
||
results = chain.batch(batch) | ||
data = json.dumps(results) | ||
save(path_llm_output, data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from .config import path_synthea_output | ||
from .utils import run_subprocess, csvs_to_df, df_to_json, save | ||
|
||
|
||
def run(*commands): | ||
run_subprocess(commands) | ||
df = csvs_to_df() | ||
data = df_to_json(df) | ||
save(path_synthea_output, data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from .config import ( | ||
path_synthea, | ||
path_patients, | ||
path_encounters, | ||
cols_patients, | ||
cols_encounters, | ||
cols, | ||
) | ||
|
||
import os | ||
import subprocess | ||
import nhs_number | ||
import pandas as pd | ||
import json | ||
|
||
|
||
def run_subprocess(commands): | ||
cwd = os.getcwd() | ||
os.chdir(path_synthea) | ||
|
||
subprocess.run( | ||
commands, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT | ||
) | ||
|
||
os.chdir(cwd) | ||
|
||
|
||
def append_nhs_numbers(df_input): | ||
nhs_numbers = nhs_number.generate(quantity=len(df_input)) | ||
df_output = df_input.copy().assign(NHS_NUMBER=nhs_numbers) | ||
|
||
return df_output | ||
|
||
|
||
def preprocess_patients(df_input): | ||
df_output = append_nhs_numbers(df_input) | ||
|
||
return df_output | ||
|
||
|
||
def preprocess_encounters(df_input): | ||
df_output = df_input.copy() | ||
|
||
df_output = df_output[ | ||
(df_output["ENCOUNTERCLASS"] != "wellness") | ||
& (df_output["REASONDESCRIPTION"].notna()) | ||
].drop_duplicates(subset="PATIENT") | ||
|
||
return df_output | ||
|
||
|
||
def csvs_to_df(): | ||
df_patients_data = pd.read_csv(path_patients)[cols_patients] | ||
df_encounters_data = pd.read_csv(path_encounters)[cols_encounters] | ||
|
||
df_patients = preprocess_patients(df_patients_data) | ||
df_encounters = preprocess_encounters(df_encounters_data) | ||
|
||
df_output = df_patients.join(df_encounters.set_index("PATIENT"), on="Id")[ | ||
cols.keys() | ||
].rename(columns=cols)[cols.values()] | ||
|
||
return df_output | ||
|
||
|
||
def df_to_json(df_input): | ||
array = [] | ||
|
||
for i in range(len(df_input)): | ||
array.append(df_input.iloc[i].to_dict()) | ||
|
||
output = json.dumps(array) | ||
|
||
return output | ||
|
||
|
||
def save(path, data): | ||
os.makedirs(os.path.dirname(path), exist_ok=True) | ||
|
||
with open(path, "w") as f: | ||
f.write(data) | ||
|
||
|
||
def load_synthea_output(path): | ||
with open(path) as file: | ||
data = json.load(file) | ||
|
||
batch = [] | ||
|
||
for i in range(len(data)): | ||
batch.append({"data": json.dumps(data[i])}) | ||
|
||
return batch |