NeuronZero commited on
Commit
02216c1
·
verified ·
1 Parent(s): 7977211

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +65 -0
  2. examples.json +18 -0
  3. process.py +63 -0
  4. requirements.txt +73 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from collections import defaultdict, Counter
3
+
4
+ import matplotlib.pyplot as plt
5
+ import gradio as gr
6
+ import pandas as pd
7
+ from transformers import pipeline
8
+
9
+ plt.switch_backend("Agg")
10
+
11
+ examples = []
12
+ with open("examples.json", "r") as f:
13
+ content = json.load(f)
14
+ examples = [f"{x['label']}: {x['text']}" for x in content]
15
+
16
+ pipe = pipeline(
17
+ "ner",
18
+ model="Clinical-AI-Apollo/Medical-NER",
19
+ aggregation_strategy="simple",
20
+ )
21
+
22
+
23
+ def plot_to_figure(grouped):
24
+ fig = plt.figure()
25
+ plt.bar(x=list(grouped.keys()), height=list(grouped.values()))
26
+ plt.margins(0.2)
27
+ plt.subplots_adjust(bottom=0.4)
28
+ plt.xticks(rotation=90)
29
+ return fig
30
+
31
+
32
+ def run_ner(text):
33
+ raw = pipe(text)
34
+ ner_content = {
35
+ "text": text,
36
+ "entities": [
37
+ {
38
+ "entity": x["entity_group"],
39
+ "word": x["word"],
40
+ "score": x["score"],
41
+ "start": x["start"],
42
+ "end": x["end"],
43
+ }
44
+ for x in raw
45
+ ],
46
+ }
47
+ grouped = Counter((x["entity_group"] for x in raw))
48
+ rows = [[k, v] for k, v in grouped.items()]
49
+ figure = plot_to_figure(grouped)
50
+ return ner_content, rows, figure
51
+
52
+
53
+ with gr.Blocks() as demo:
54
+ note = gr.Textbox(label="Note text")
55
+ submit = gr.Button("Submit")
56
+ # with gr.Accordion("Examples", open=False):
57
+ example_dropdown = gr.Dropdown(label="Examples", choices=examples)
58
+ example_dropdown.change(lambda x: x, inputs=example_dropdown, outputs=note)
59
+ highlight = gr.HighlightedText(label="NER", combine_adjacent=True)
60
+ table = gr.Dataframe(headers=["Entity", "Count"])
61
+ plot = gr.Plot(label="Bar")
62
+ submit.click(run_ner, [note], [highlight, table, plot])
63
+ note.submit(run_ner, [note], [highlight, table, plot])
64
+
65
+ demo.launch()
examples.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "text": "This is a 60 year old patient with history of migraines admitted for medical management of diverticulitis responding well to therapy who is now hypertensive with a gradually worsening headache. Hypertensive urgency Headache secondary to increased BP Retinal Artery occlusion less likely as no papilledema migraine does have history of migraines, in absence of PE findings makes this more likely, also has not been receiving home propranolol for prophylaxis",
4
+ "label": 3
5
+ },
6
+ {
7
+ "text": "This is an older man with multiple cardiovascular risk factors and BPH on flomax who is post op day 3 from AAA repair recovering well postoperatively initially, now with decreased urinary output in the setting of foley removal 8 hours ago and after contrast CT this am, otherwise asymptomatic with normal creatinine and BUN, normal electrolytes and benign abdominal exam, presentation concerning for postrenal etiology (obstruction in setting of foley removal, s/p morphine, h/o BPH) vs nephrotoxicity (contrast, analgesics). Follow up with bladder scan and repeat BMP, urinalysis. obstruction started after foley removal ho bph no evidence of renal hypoperfusion with normal vitals, no cardiac symptoms (check EKG to be safe) as above UTI risk factor of foley",
8
+ "label": 2
9
+ },
10
+ {
11
+ "text": "49 y/o male with PMHx of BPH, HTN, DM postop day #3 for AAA repair with possible urinary retention. Fluid sequestration Patient no longer on IV fluids Patient has not been drinking many fluids reports 2 cups of coffee/orange juice this morning BUN of 18 and Cr and of 1.0 are WNL Urinary retention secondary to BPH Decreased Urine production Pt has a history of BPH home medication of Tamsolousin .4mg",
12
+ "label": 0
13
+ },
14
+ {
15
+ "text": "Urinary Retention from Abdominal Surgery Urinary Retention from Abdominal Surgery Decreased urine output 100cc/urine after foley removal today at noon. Distended abdomen Urinary Distention from Enlarged Prostate Decreased urine output 100cc/urine after foley removal today at noon. Increased prostate",
16
+ "label": 1
17
+ }
18
+ ]
process.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import csv
3
+ import json
4
+
5
+ from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
6
+
7
+ MODEL = "d4data/biomedical-ner-all"
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
10
+ model = AutoModelForTokenClassification.from_pretrained(MODEL)
11
+
12
+ pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
13
+
14
+
15
+ def process(*args):
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument('--notes', help='Notes CSV', required=True)
18
+ parser.add_argument('--out', help='Output', required=True)
19
+ args = parser.parse_args()
20
+
21
+ filepath = args.notes
22
+ outpath = args.out
23
+
24
+ if not filepath.endswith(".csv"):
25
+ raise ValueError("Filepath must be a .csv file.")
26
+
27
+ if not outpath.endswith(".json"):
28
+ raise ValueError("Output path must be a .json file.")
29
+
30
+ processed = []
31
+ with open(filepath, "r") as f:
32
+ reader = csv.DictReader(f)
33
+ for row in reader:
34
+ text = row["text"]
35
+ raw = pipe(text)
36
+ # do something with `raw` here e.g. save to file
37
+ ner_content = {
38
+ # "text": text,
39
+ "score": row["score"],
40
+ "student_id": row["student_id"],
41
+ "case": row["case"],
42
+ "entities": [
43
+ {
44
+ "entity": x["entity_group"],
45
+ "word": x["word"],
46
+ "score": round(float(x["score"]), 2),
47
+ "start": x["start"],
48
+ "end": x["end"],
49
+ }
50
+ for x in raw
51
+ ],
52
+ }
53
+ processed.append(ner_content)
54
+
55
+ # write as json to file
56
+ with open(outpath, "w") as f:
57
+ json.dump(processed, f)
58
+
59
+
60
+ if __name__ == "__main__":
61
+ import sys
62
+
63
+ process(*sys.argv[1:])
requirements.txt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.2.0
3
+ annotated-types==0.6.0
4
+ anyio==4.3.0
5
+ attrs==23.2.0
6
+ certifi==2024.2.2
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ colorama==0.4.6
10
+ contourpy==1.2.0
11
+ cycler==0.12.1
12
+ fastapi==0.110.0
13
+ ffmpy==0.3.2
14
+ filelock==3.13.1
15
+ fonttools==4.49.0
16
+ fsspec==2024.2.0
17
+ gradio==4.20.1
18
+ gradio_client==0.11.0
19
+ h11==0.14.0
20
+ httpcore==1.0.4
21
+ httpx==0.27.0
22
+ huggingface-hub==0.21.4
23
+ idna==3.6
24
+ importlib_resources==6.1.3
25
+ Jinja2==3.1.3
26
+ jsonschema==4.21.1
27
+ jsonschema-specifications==2023.12.1
28
+ kiwisolver==1.4.5
29
+ markdown-it-py==3.0.0
30
+ MarkupSafe==2.1.3
31
+ matplotlib==3.8.3
32
+ mdurl==0.1.2
33
+ mpmath==1.2.1
34
+ networkx==3.2.1
35
+ numpy==1.26.4
36
+ orjson==3.9.15
37
+ packaging==23.2
38
+ pandas==2.2.1
39
+ Pillow==10.1.0
40
+ pydantic==2.6.3
41
+ pydantic_core==2.16.3
42
+ pydub==0.25.1
43
+ Pygments==2.17.2
44
+ pyparsing==3.1.2
45
+ python-dateutil==2.9.0.post0
46
+ python-multipart==0.0.9
47
+ pytz==2024.1
48
+ PyYAML==6.0.1
49
+ referencing==0.33.0
50
+ regex==2023.12.25
51
+ requests==2.31.0
52
+ rich==13.7.1
53
+ rpds-py==0.18.0
54
+ ruff==0.3.1
55
+ safetensors==0.4.2
56
+ semantic-version==2.10.0
57
+ shellingham==1.5.4
58
+ six==1.16.0
59
+ sniffio==1.3.1
60
+ starlette==0.36.3
61
+ sympy==1.12
62
+ tokenizers==0.15.2
63
+ tomlkit==0.12.0
64
+ toolz==0.12.1
65
+ torch==2.2.1
66
+ tqdm==4.66.2
67
+ transformers==4.38.2
68
+ typer==0.9.0
69
+ typing_extensions==4.8.0
70
+ tzdata==2024.1
71
+ urllib3==2.2.1
72
+ uvicorn==0.27.1
73
+ websockets==11.0.3