Livia_Zaharia commited on
Commit
82579ab
·
1 Parent(s): de5608c

read from unproccessed dexcom

Browse files
Files changed (7) hide show
  1. .gitignore +10 -1
  2. app.py +30 -12
  3. data_formatter/base.py +3 -1
  4. format_dexcom.py +15 -14
  5. main.py +0 -8
  6. tools.py +20 -22
  7. utils/darts_processing.py +4 -1
.gitignore CHANGED
@@ -1 +1,10 @@
1
- __pycache__
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ /__pycache__
3
+ files/
4
+ .micromamba/
5
+ .mamba/
6
+ .mamba-cache/
7
+ .mamba-env/
8
+ .mamba-env-cache/
9
+ .DS_Store
10
+ .vscode/
app.py CHANGED
@@ -1,9 +1,29 @@
1
  import gradio as gr
2
  from tools import *
 
 
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  with gr.Blocks() as demo:
6
- file_input = gr.File(label="Upload CSV File")
 
 
 
7
  with gr.Row():
8
  index_slider = gr.Slider(
9
  minimum=0,
@@ -18,27 +38,25 @@ with gr.Blocks() as demo:
18
 
19
  # Update slider and show total samples when file is uploaded
20
  file_input.change(
21
- fn=prep_predict_glucose_tool,
22
  inputs=[file_input],
23
  outputs=[index_slider, sample_count],
24
- queue=False
25
  )
26
- # Set visibility separately
 
27
  file_input.change(
28
  fn=lambda: (gr.Slider(visible=True), gr.Markdown(visible=True)),
29
- outputs=[index_slider, sample_count]
 
30
  )
31
 
32
- # Update plot when slider changes or file uploads
33
- file_input.change(
34
- fn=predict_glucose_tool,
35
- inputs=[index_slider],
36
- outputs=plot_output
37
- )
38
  index_slider.change(
39
  fn=predict_glucose_tool,
40
  inputs=[index_slider],
41
- outputs=plot_output
 
42
  )
43
 
44
  demo.launch()
 
1
  import gradio as gr
2
  from tools import *
3
+ from format_dexcom import process_csv
4
+ import tempfile
5
+ import os
6
 
7
+ def process_and_prepare(file):
8
+ """Process the raw CSV and prepare it for prediction"""
9
+ # Create a temporary file for the processed CSV
10
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp_file:
11
+ processed_path = tmp_file.name
12
+
13
+ # Process the CSV file
14
+ process_csv(
15
+ input_dir=file.name,
16
+ output_file=processed_path
17
+ )
18
+
19
+ # Run the preparation step with processed file
20
+ return prep_predict_glucose_tool(processed_path)
21
 
22
  with gr.Blocks() as demo:
23
+ gr.Markdown("# Glucose Prediction Tool")
24
+ gr.Markdown("Upload a Dexcom CSV file to get predictions")
25
+
26
+ file_input = gr.File(label="Upload Raw Dexcom CSV File")
27
  with gr.Row():
28
  index_slider = gr.Slider(
29
  minimum=0,
 
38
 
39
  # Update slider and show total samples when file is uploaded
40
  file_input.change(
41
+ fn=process_and_prepare,
42
  inputs=[file_input],
43
  outputs=[index_slider, sample_count],
44
+ queue=True
45
  )
46
+
47
+ # Update visibility after processing
48
  file_input.change(
49
  fn=lambda: (gr.Slider(visible=True), gr.Markdown(visible=True)),
50
+ outputs=[index_slider, sample_count],
51
+ queue=True
52
  )
53
 
54
+ # Only update plot after processing is complete
 
 
 
 
 
55
  index_slider.change(
56
  fn=predict_glucose_tool,
57
  inputs=[index_slider],
58
+ outputs=plot_output,
59
+ queue=True
60
  )
61
 
62
  demo.launch()
data_formatter/base.py CHANGED
@@ -25,7 +25,9 @@ dict_input_type = {'target': InputTypes.TARGET,
25
  class DataFormatter:
26
  # Defines and formats data.
27
 
28
- def __init__(self, cnf):
 
 
29
  """Initialises formatter."""
30
  # load parameters from the config file
31
  self.params = cnf
 
25
  class DataFormatter:
26
  # Defines and formats data.
27
 
28
+ def __init__(self, cnf
29
+ #, df
30
+ ):
31
  """Initialises formatter."""
32
  # load parameters from the config file
33
  self.params = cnf
format_dexcom.py CHANGED
@@ -1,20 +1,18 @@
1
  import pandas as pd
2
  from pathlib import Path
3
- import typer
4
 
5
 
6
  def process_csv(
7
- input_dir: Path = typer.Argument( help="Directory containing the input CSV files."),
8
- output_file: Path = typer.Argument( help="Path to save the processed CSV file."),
9
- event_type_filter: str = typer.Option('egv', help="Event type to filter by."),
10
- drop_duplicates: bool = typer.Option(True, help="Whether to drop duplicate timestamps."),
11
- time_diff_minutes: int = typer.Option(1, help="Minimum time difference in minutes to keep a row."),
12
- chunk_size: int = typer.Option(1000, help="Chunk size for the 'id' column increment. Set to 0 or None for a single id."),
13
  ) -> pd.DataFrame:
14
 
15
  # Read CSV file into a DataFrame
16
- filename=input_dir
17
- df = pd.read_csv(filename, low_memory=False)
18
 
19
 
20
  # Filter by Event Type and Event Subtype
@@ -39,12 +37,16 @@ def process_csv(
39
  }
40
  df = df.rename(columns=column_rename)
41
 
 
 
 
 
42
 
43
  # Handle id assignment based on chunk_size
44
  if chunk_size is None or chunk_size == 0:
45
  df['id'] = 1 # Assign the same id to all rows
46
  else:
47
- df['id'] = ((df.index // chunk_size) % (df.index.max() // chunk_size + 1)).astype(int)
48
 
49
  # Convert timestamp to datetime
50
  df['time'] = pd.to_datetime(df['time'])
@@ -66,13 +68,12 @@ def process_csv(
66
  # Write the modified dataframe to a new CSV file
67
  df.to_csv(output_file, index=False)
68
 
69
- typer.echo("CSV files have been successfully merged, modified, and saved.")
70
 
71
  return df
72
 
73
 
74
-
75
-
76
  def process_multiple_csv(
77
  input_dir: Path = typer.Argument('./raw_data/livia_unmerged', help="Directory containing the input CSV files."),
78
  output_file: Path = typer.Argument('./raw_data/livia_unmerged/livia_mini.csv', help="Path to save the processed CSV file."),
@@ -147,6 +148,6 @@ def process_multiple_csv(
147
  combined_df.to_csv(output_file, index=False)
148
 
149
  typer.echo("CSV files have been successfully merged, modified, and saved.")
150
-
151
  if __name__ == "__main__":
152
  typer.run(process_csv)
 
1
  import pandas as pd
2
  from pathlib import Path
 
3
 
4
 
5
  def process_csv(
6
+ input_dir: Path,
7
+ output_file: Path,
8
+ event_type_filter: str = 'egv',
9
+ drop_duplicates: bool = True,
10
+ time_diff_minutes: int = 1,
11
+ chunk_size: int = 1000,
12
  ) -> pd.DataFrame:
13
 
14
  # Read CSV file into a DataFrame
15
+ df = pd.read_csv(input_dir, low_memory=False)
 
16
 
17
 
18
  # Filter by Event Type and Event Subtype
 
37
  }
38
  df = df.rename(columns=column_rename)
39
 
40
+ df['id'] = df['id'].astype(int)
41
+ df = df.dropna(subset=['id']) # Drops rows where the index is NaN
42
+
43
+
44
 
45
  # Handle id assignment based on chunk_size
46
  if chunk_size is None or chunk_size == 0:
47
  df['id'] = 1 # Assign the same id to all rows
48
  else:
49
+ df['id'] = (df.index // chunk_size).astype(int)
50
 
51
  # Convert timestamp to datetime
52
  df['time'] = pd.to_datetime(df['time'])
 
68
  # Write the modified dataframe to a new CSV file
69
  df.to_csv(output_file, index=False)
70
 
71
+ #typer.echo("CSV file has been successfully processed.")
72
 
73
  return df
74
 
75
 
76
+ '''
 
77
  def process_multiple_csv(
78
  input_dir: Path = typer.Argument('./raw_data/livia_unmerged', help="Directory containing the input CSV files."),
79
  output_file: Path = typer.Argument('./raw_data/livia_unmerged/livia_mini.csv', help="Path to save the processed CSV file."),
 
148
  combined_df.to_csv(output_file, index=False)
149
 
150
  typer.echo("CSV files have been successfully merged, modified, and saved.")
151
+ '''
152
  if __name__ == "__main__":
153
  typer.run(process_csv)
main.py DELETED
@@ -1,8 +0,0 @@
1
- import gradio as gr
2
- from tools import *
3
-
4
-
5
- def gradio_output():
6
- return (predict_glucose_tool())
7
-
8
- gr.Interface(fn=gradio_output).launch()
 
 
 
 
 
 
 
 
 
tools.py CHANGED
@@ -14,12 +14,12 @@ from urllib.parse import urlparse
14
  from huggingface_hub import hf_hub_download
15
  import plotly.graph_objects as go
16
  import gradio as gr
 
17
 
18
 
19
  glucose = Path(os.path.abspath(__file__)).parent.resolve()
20
  file_directory = glucose / "files"
21
-
22
-
23
  def plot_forecast(forecasts: np.ndarray, filename: str,ind:int=10):
24
 
25
  forecasts = (forecasts - scalers['target'].min_) / scalers['target'].scale_
@@ -129,27 +129,27 @@ def generate_filename_from_url(url: str, extension: str = "png") -> str:
129
  return filename
130
 
131
 
 
 
 
 
 
132
  def prep_predict_glucose_tool(file):
133
  """
134
- Function to predict future glucose of user. It receives URL with users csv. It will run an ML and will return URL with predictions that user can open on her own..
135
- :param file: it is the csv file imported as a string path to the temporary location gradio allows
136
- :param model: model that is used to predict the glucose- was hardcoded
137
- :param explain if it should give both url and explanation
138
- :param if the person is diabetic when doing prediction and explanation
139
- :return:
140
  """
141
-
142
 
143
- model="Livia-Zaharia/gluformer_models"
144
- model_path = hf_hub_download(repo_id= model, filename="gluformer_1samples_10000epochs_10heads_32batch_geluactivation_livia_mini_weights.pth")
145
 
146
- global formatter
147
- global series
148
- global scalers
149
- formatter, series, scalers = load_data(url=str(file), config_path=file_directory / "config.yaml", use_covs=True,
150
- cov_type='dual',
151
- use_static_covs=True)
152
-
153
 
154
  formatter.params['gluformer'] = {
155
  'in_len': 96, # example input length, adjust as necessary
@@ -182,10 +182,9 @@ def prep_predict_glucose_tool(file):
182
  )
183
 
184
  device = "cuda" if torch.cuda.is_available() else "cpu"
185
- glufo.load_state_dict(torch.load(str(model_path), map_location=torch.device(device), weights_only=True))
186
 
187
  global dataset_test_glufo
188
- # Define dataset for inference
189
  dataset_test_glufo = SamplingDatasetInferenceDual(
190
  target_series=series['test']['target'],
191
  covariates=series['test']['future'],
@@ -213,8 +212,7 @@ def prep_predict_glucose_tool(file):
213
 
214
 
215
  def predict_glucose_tool(ind) -> go.Figure:
216
-
217
-
218
 
219
  device = "cuda" if torch.cuda.is_available() else "cpu"
220
 
 
14
  from huggingface_hub import hf_hub_download
15
  import plotly.graph_objects as go
16
  import gradio as gr
17
+ from format_dexcom import *
18
 
19
 
20
  glucose = Path(os.path.abspath(__file__)).parent.resolve()
21
  file_directory = glucose / "files"
22
+
 
23
  def plot_forecast(forecasts: np.ndarray, filename: str,ind:int=10):
24
 
25
  forecasts = (forecasts - scalers['target'].min_) / scalers['target'].scale_
 
129
  return filename
130
 
131
 
132
+ glufo = None
133
+ scalers = None
134
+ dataset_test_glufo = None
135
+ filename = None
136
+
137
  def prep_predict_glucose_tool(file):
138
  """
139
+ Function to predict future glucose of user.
 
 
 
 
 
140
  """
141
+ global formatter, series, scalers, glufo, dataset_test_glufo, filename
142
 
143
+ model = "Livia-Zaharia/gluformer_models"
144
+ model_path = hf_hub_download(repo_id=model, filename="gluformer_1samples_10000epochs_10heads_32batch_geluactivation_livia_mini_weights.pth")
145
 
146
+ formatter, series, scalers = load_data(
147
+ url=str(file),
148
+ config_path=file_directory / "config.yaml",
149
+ use_covs=True,
150
+ cov_type='dual',
151
+ use_static_covs=True
152
+ )
153
 
154
  formatter.params['gluformer'] = {
155
  'in_len': 96, # example input length, adjust as necessary
 
182
  )
183
 
184
  device = "cuda" if torch.cuda.is_available() else "cpu"
185
+ glufo.load_state_dict(torch.load(str(model_path), map_location=torch.device(device)))
186
 
187
  global dataset_test_glufo
 
188
  dataset_test_glufo = SamplingDatasetInferenceDual(
189
  target_series=series['test']['target'],
190
  covariates=series['test']['future'],
 
212
 
213
 
214
  def predict_glucose_tool(ind) -> go.Figure:
215
+
 
216
 
217
  device = "cuda" if torch.cuda.is_available() else "cpu"
218
 
utils/darts_processing.py CHANGED
@@ -81,6 +81,7 @@ def make_series(data: Dict[str, pd.DataFrame],
81
 
82
  def load_data(url: str,
83
  config_path: Path,
 
84
  use_covs: bool = False,
85
  cov_type: str = 'past',
86
  use_static_covs: bool = False, seed = 0):
@@ -164,7 +165,9 @@ def load_data(url: str,
164
  config = yaml.safe_load(f)
165
  config["data_csv_path"] = url
166
 
167
- formatter = DataFormatter(config)
 
 
168
  assert use_covs is not None, 'use_covs must be specified in the load_data call'
169
 
170
  # convert to series
 
81
 
82
  def load_data(url: str,
83
  config_path: Path,
84
+ #df: pd.DataFrame,
85
  use_covs: bool = False,
86
  cov_type: str = 'past',
87
  use_static_covs: bool = False, seed = 0):
 
165
  config = yaml.safe_load(f)
166
  config["data_csv_path"] = url
167
 
168
+ formatter = DataFormatter(config
169
+ #,df
170
+ )
171
  assert use_covs is not None, 'use_covs must be specified in the load_data call'
172
 
173
  # convert to series