Skip to content

Commit

Permalink
Added generative source files with a full pipeline example
Browse files Browse the repository at this point in the history
  • Loading branch information
xiyaozhuang committed Mar 5, 2024
1 parent 434be0b commit 236c7f4
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 0 deletions.
121 changes: 121 additions & 0 deletions notebooks/full_pipeline_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"\n",
"path_root = os.path.dirname(os.getcwd())\n",
"\n",
"if path_root not in sys.path:\n",
" sys.path.append(path_root)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import src.generate.synthea as synthea\n",
"import src.generate.llm as llm"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"synthea.run(\"./run_synthea\", \"-p\", \"10\", \"West Yorkshire\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = \"llama2\"\n",
"template = \"\"\"[INST]\n",
"<<SYS>>\n",
"You are a medical student answering an exam question about writing clinical notes for patients.\n",
"<</SYS>>\n",
"\n",
"Keep in mind that your answer will be asssessed based on incorporating all the provided information and the quality of prose.\n",
"\n",
"1. Use prose to write an example clinical note for this patient's doctor.\n",
"2. Use less than three sentences.\n",
"3. Do not provide a diagnosis or recommendations.\n",
"4. Use the following information:\n",
"\n",
"{data}\n",
"[/INST]\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Clinical Note:\n",
"Patient Dia Spencer, age 23, presents with acute viral pharyngitis. Symptoms include sore throat, fever, and difficulty swallowing. Patient's NHS number is 0427153913.Clinical Note:\n",
"Patient Presentation: Charley Robel, a 12-year-old male, presents with acute viral pharyngitis. The patient's NHS number is 7864995101 and date of birth is March 4th, 2008. Given name is Charley and family name is Robel.\n",
"\n",
"Symptoms: Fever, sore throat, and difficulty swallowing.\n",
"\n",
"Medical History: No significant medical history.\n",
"\n",
"Plan: Observation and close monitoring of symptoms for the next 7-10 days. Antiviral medication may be prescribed if symptoms persist or worsen.Clinical Note:\n",
"Mr. Jackson Schmeler presents today with a two-day history of worsening itching and inflammation on his face, particularly around his eyes and mouth. He reports that the symptoms have been gradually increasing in severity over the past week. His birthdate is May 6th, 2005, and he has been experiencing this condition for a few months now.Clinical Note:\n",
"\n",
"Patient Name: Shaunta Kuhic\n",
"NHS Number: 7925078782\n",
"Date of Birth: April 5, 1997\n",
"\n",
"Presentation: The patient presents with acute viral pharyngitis. She has experienced a sore throat for the past 3 days and has difficulty swallowing. Fever is present but not severe.Clinical Note:\n",
"Patient Hildegard Ratke, NHS number 1520958625, presents for routine prenatal care at 20 weeks gestation. Born on January 20, 1967, she is in her 23rd week of pregnancy. No medical concerns or complications have been identified at this time.Clinical Note:\n",
"Mr. Hermiston presents today with symptoms of viral sinusitis, including nasal congestion, facial pain, and postnasal drip. He reports a gradual onset of these symptoms over the past week, which have significantly impacted his quality of life. His medication list includes decongestants and antihistamines, but he has not experienced significant relief.Clinical Note for George Hartmann:\n",
"\n",
"Mr. Hartmann presents today with a seizure disorder. He was born on July 20, 1952, and his NHS number is 5183634092. He has been experiencing recurring seizures over the past year, with the most recent episode occurring yesterday evening. Further evaluation is necessary to determine the underlying cause of these seizures and develop an appropriate treatment plan.Clinical Note:\n",
"Mr. Walker presents today with a 3-day history of right-sided otalgia and fever. He was seen in the emergency department last night and was prescribed antibiotics, which he has not yet started. He reports no recent travel or exposure to illness. His date of birth is March 25, 1956, and his NHS number is 7448453998.Clinical Note:\n",
"Patient Wonda Bernhard presents with symptoms of acute viral pharyngitis. She is an 82-year-old female who was born on August 6, 1939. Her NHS number is 8491122729. Patient reports sore throat, fever, and difficulty swallowing."
]
}
],
"source": [
"llm.run(model, template)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
17 changes: 17 additions & 0 deletions src/generate/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
path_synthea = "../../synthea"
path_csv = path_synthea + "/output/csv"
path_patients = path_csv + "/patients.csv"
path_encounters = path_csv + "/encounters.csv"
path_synthea_output = "../data/synthea.json"

cols_patients = ["Id", "BIRTHDATE", "FIRST", "LAST"]
cols_encounters = ["PATIENT", "ENCOUNTERCLASS", "REASONDESCRIPTION"]
cols = {
"NHS_NUMBER": "NHS_NUMBER",
"BIRTHDATE": "DATE_OF_BIRTH",
"FIRST": "GIVEN_NAME",
"LAST": "FAMILY_NAME",
"REASONDESCRIPTION": "DIAGNOSIS",
}

path_llm_output = "../data/llm.json"
21 changes: 21 additions & 0 deletions src/generate/llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from .config import path_synthea_output, path_llm_output
from .utils import load_synthea_output, save
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import Ollama
from langchain.prompts import PromptTemplate
import json


def run(model, template):
batch = load_synthea_output(path_synthea_output)

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = Ollama(model=model, callback_manager=callback_manager)
prompt = PromptTemplate.from_template(template)
chain = prompt | llm

results = chain.batch(batch)
data = json.dumps(results)
save(path_llm_output, data)
9 changes: 9 additions & 0 deletions src/generate/synthea.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .config import path_synthea_output
from .utils import run_subprocess, csvs_to_df, df_to_json, save


def run(*commands):
run_subprocess(commands)
df = csvs_to_df()
data = df_to_json(df)
save(path_synthea_output, data)
93 changes: 93 additions & 0 deletions src/generate/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from .config import (
path_synthea,
path_patients,
path_encounters,
cols_patients,
cols_encounters,
cols,
)

import os
import subprocess
import nhs_number
import pandas as pd
import json


def run_subprocess(commands):
cwd = os.getcwd()
os.chdir(path_synthea)

subprocess.run(
commands, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT
)

os.chdir(cwd)


def append_nhs_numbers(df_input):
nhs_numbers = nhs_number.generate(quantity=len(df_input))
df_output = df_input.copy().assign(NHS_NUMBER=nhs_numbers)

return df_output


def preprocess_patients(df_input):
df_output = append_nhs_numbers(df_input)

return df_output


def preprocess_encounters(df_input):
df_output = df_input.copy()

df_output = df_output[
(df_output["ENCOUNTERCLASS"] != "wellness")
& (df_output["REASONDESCRIPTION"].notna())
].drop_duplicates(subset="PATIENT")

return df_output


def csvs_to_df():
df_patients_data = pd.read_csv(path_patients)[cols_patients]
df_encounters_data = pd.read_csv(path_encounters)[cols_encounters]

df_patients = preprocess_patients(df_patients_data)
df_encounters = preprocess_encounters(df_encounters_data)

df_output = df_patients.join(df_encounters.set_index("PATIENT"), on="Id")[
cols.keys()
].rename(columns=cols)[cols.values()]

return df_output


def df_to_json(df_input):
array = []

for i in range(len(df_input)):
array.append(df_input.iloc[i].to_dict())

output = json.dumps(array)

return output


def save(path, data):
os.makedirs(os.path.dirname(path), exist_ok=True)

with open(path, "w") as f:
f.write(data)


def load_synthea_output(path):
with open(path) as file:
data = json.load(file)

batch = []

for i in range(len(data)):
batch.append({"data": json.dumps(data[i])})

return batch

0 comments on commit 236c7f4

Please sign in to comment.