Spaces:

antonkulaga
/

glucosedao_gpu

Sleeping

App Files Files Community

Livia_Zaharia commited on Nov 16, 2024

Commit

82579ab

1 Parent(s): de5608c

read from unproccessed dexcom

Browse files

Files changed (7) hide show

.gitignore +10 -1
app.py +30 -12
data_formatter/base.py +3 -1
format_dexcom.py +15 -14
main.py +0 -8
tools.py +20 -22
utils/darts_processing.py +4 -1

.gitignore CHANGED Viewed

	@@ -1 +1,10 @@
1	- __pycache__

+__pycache__/
+/__pycache__
+files/
+.micromamba/
+.mamba/
+.mamba-cache/
+.mamba-env/
+.mamba-env-cache/
+.DS_Store
+.vscode/

app.py CHANGED Viewed

@@ -1,9 +1,29 @@
 import gradio as gr
 from tools import *
 with gr.Blocks() as demo:
-    file_input = gr.File(label="Upload CSV File")
     with gr.Row():
         index_slider = gr.Slider(
             minimum=0,
@@ -18,27 +38,25 @@ with gr.Blocks() as demo:
     # Update slider and show total samples when file is uploaded
     file_input.change(
-        fn=prep_predict_glucose_tool,
         inputs=[file_input],
         outputs=[index_slider, sample_count],
-        queue=False
     )
-        # Set visibility separately
     file_input.change(
         fn=lambda: (gr.Slider(visible=True), gr.Markdown(visible=True)),
-        outputs=[index_slider, sample_count]
     )
-    # Update plot when slider changes or file uploads
-    file_input.change(
-        fn=predict_glucose_tool,
-        inputs=[index_slider],
-        outputs=plot_output
-    )
     index_slider.change(
         fn=predict_glucose_tool,
         inputs=[index_slider],
-        outputs=plot_output
     )
 demo.launch()

 import gradio as gr
 from tools import *
+from format_dexcom import process_csv
+import tempfile
+import os
+def process_and_prepare(file):
+    """Process the raw CSV and prepare it for prediction"""
+    # Create a temporary file for the processed CSV
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp_file:
+        processed_path = tmp_file.name
+    # Process the CSV file
+    process_csv(
+        input_dir=file.name,
+        output_file=processed_path
+    )
+    # Run the preparation step with processed file
+    return prep_predict_glucose_tool(processed_path)
 with gr.Blocks() as demo:
+    gr.Markdown("# Glucose Prediction Tool")
+    gr.Markdown("Upload a Dexcom CSV file to get predictions")
+    file_input = gr.File(label="Upload Raw Dexcom CSV File")
     with gr.Row():
         index_slider = gr.Slider(
             minimum=0,
     # Update slider and show total samples when file is uploaded
     file_input.change(
+        fn=process_and_prepare,
         inputs=[file_input],
         outputs=[index_slider, sample_count],
+        queue=True
     )
+    # Update visibility after processing
     file_input.change(
         fn=lambda: (gr.Slider(visible=True), gr.Markdown(visible=True)),
+        outputs=[index_slider, sample_count],
+        queue=True
     )
+    # Only update plot after processing is complete
     index_slider.change(
         fn=predict_glucose_tool,
         inputs=[index_slider],
+        outputs=plot_output,
+        queue=True
     )
 demo.launch()

data_formatter/base.py CHANGED Viewed

@@ -25,7 +25,9 @@ dict_input_type = {'target': InputTypes.TARGET,
 class DataFormatter:
   # Defines and formats data.
-  def __init__(self, cnf):
     """Initialises formatter."""
     # load parameters from the config file
     self.params = cnf

 class DataFormatter:
   # Defines and formats data.
+  def __init__(self, cnf
+               #, df
+               ):
     """Initialises formatter."""
     # load parameters from the config file
     self.params = cnf

format_dexcom.py CHANGED Viewed

@@ -1,20 +1,18 @@
 import pandas as pd
 from pathlib import Path
-import typer
 def process_csv(
-        input_dir: Path = typer.Argument( help="Directory containing the input CSV files."),
-        output_file: Path = typer.Argument( help="Path to save the processed CSV file."),
-        event_type_filter: str = typer.Option('egv', help="Event type to filter by."),
-        drop_duplicates: bool = typer.Option(True, help="Whether to drop duplicate timestamps."),
-        time_diff_minutes: int = typer.Option(1, help="Minimum time difference in minutes to keep a row."),
-        chunk_size: int = typer.Option(1000, help="Chunk size for the 'id' column increment. Set to 0 or None for a single id."),
 ) -> pd.DataFrame:
     # Read CSV file into a DataFrame
-    filename=input_dir
-    df = pd.read_csv(filename, low_memory=False)
     # Filter by Event Type and Event Subtype
@@ -39,12 +37,16 @@ def process_csv(
     }
     df = df.rename(columns=column_rename)
     # Handle id assignment based on chunk_size
     if chunk_size is None or chunk_size == 0:
         df['id'] = 1  # Assign the same id to all rows
     else:
-        df['id'] = ((df.index // chunk_size) % (df.index.max() // chunk_size + 1)).astype(int)
     # Convert timestamp to datetime
     df['time'] = pd.to_datetime(df['time'])
@@ -66,13 +68,12 @@ def process_csv(
     # Write the modified dataframe to a new CSV file
     df.to_csv(output_file, index=False)
-    typer.echo("CSV files have been successfully merged, modified, and saved.")
     return df
 def process_multiple_csv(
         input_dir: Path = typer.Argument('./raw_data/livia_unmerged', help="Directory containing the input CSV files."),
         output_file: Path = typer.Argument('./raw_data/livia_unmerged/livia_mini.csv', help="Path to save the processed CSV file."),
@@ -147,6 +148,6 @@ def process_multiple_csv(
     combined_df.to_csv(output_file, index=False)
     typer.echo("CSV files have been successfully merged, modified, and saved.")
 if __name__ == "__main__":
     typer.run(process_csv)

 import pandas as pd
 from pathlib import Path
 def process_csv(
+        input_dir: Path,
+        output_file: Path,
+        event_type_filter: str = 'egv',
+        drop_duplicates: bool = True,
+        time_diff_minutes: int = 1,
+        chunk_size: int = 1000,
 ) -> pd.DataFrame:
     # Read CSV file into a DataFrame
+    df = pd.read_csv(input_dir, low_memory=False)
     # Filter by Event Type and Event Subtype
     }
     df = df.rename(columns=column_rename)
+    df['id'] = df['id'].astype(int)
+    df = df.dropna(subset=['id'])  # Drops rows where the index is NaN
     # Handle id assignment based on chunk_size
     if chunk_size is None or chunk_size == 0:
         df['id'] = 1  # Assign the same id to all rows
     else:
+        df['id'] = (df.index // chunk_size).astype(int)
     # Convert timestamp to datetime
     df['time'] = pd.to_datetime(df['time'])
     # Write the modified dataframe to a new CSV file
     df.to_csv(output_file, index=False)
+    #typer.echo("CSV file has been successfully processed.")
     return df
+'''
 def process_multiple_csv(
         input_dir: Path = typer.Argument('./raw_data/livia_unmerged', help="Directory containing the input CSV files."),
         output_file: Path = typer.Argument('./raw_data/livia_unmerged/livia_mini.csv', help="Path to save the processed CSV file."),
     combined_df.to_csv(output_file, index=False)
     typer.echo("CSV files have been successfully merged, modified, and saved.")
+'''
 if __name__ == "__main__":
     typer.run(process_csv)

main.py DELETED Viewed

@@ -1,8 +0,0 @@
-import gradio as gr
-from tools import *
-def gradio_output():
-    return (predict_glucose_tool())
-gr.Interface(fn=gradio_output).launch()

tools.py CHANGED Viewed

@@ -14,12 +14,12 @@ from urllib.parse import urlparse
 from huggingface_hub import hf_hub_download
 import plotly.graph_objects as go
 import gradio as gr
 glucose = Path(os.path.abspath(__file__)).parent.resolve()
 file_directory = glucose / "files"
 def plot_forecast(forecasts: np.ndarray, filename: str,ind:int=10):
     forecasts = (forecasts - scalers['target'].min_) / scalers['target'].scale_
@@ -129,27 +129,27 @@ def generate_filename_from_url(url: str, extension: str = "png") -> str:
     return filename
 def prep_predict_glucose_tool(file):
     """
-    Function to predict future glucose of user. It receives URL with users csv. It will run an ML and will return URL with predictions that user can open on her own..
-    :param file: it is the csv file imported as a string path to the temporary location gradio allows
-    :param model: model that is used to predict the glucose- was hardcoded
-    :param explain if it should give both url and explanation
-    :param if the person is diabetic when doing prediction and explanation
-    :return:
     """
-    model="Livia-Zaharia/gluformer_models"
-    model_path = hf_hub_download(repo_id= model, filename="gluformer_1samples_10000epochs_10heads_32batch_geluactivation_livia_mini_weights.pth")
-    global formatter
-    global series
-    global scalers
-    formatter, series, scalers = load_data(url=str(file), config_path=file_directory / "config.yaml", use_covs=True,
-                                           cov_type='dual',
-                                           use_static_covs=True)
     formatter.params['gluformer'] = {
         'in_len': 96,  # example input length, adjust as necessary
@@ -182,10 +182,9 @@ def prep_predict_glucose_tool(file):
     )
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    glufo.load_state_dict(torch.load(str(model_path), map_location=torch.device(device), weights_only=True))
     global dataset_test_glufo
-    # Define dataset for inference
     dataset_test_glufo = SamplingDatasetInferenceDual(
         target_series=series['test']['target'],
         covariates=series['test']['future'],
@@ -213,8 +212,7 @@ def prep_predict_glucose_tool(file):
 def predict_glucose_tool(ind) -> go.Figure:
     device = "cuda" if torch.cuda.is_available() else "cpu"

 from huggingface_hub import hf_hub_download
 import plotly.graph_objects as go
 import gradio as gr
+from format_dexcom import *
 glucose = Path(os.path.abspath(__file__)).parent.resolve()
 file_directory = glucose / "files"
 def plot_forecast(forecasts: np.ndarray, filename: str,ind:int=10):
     forecasts = (forecasts - scalers['target'].min_) / scalers['target'].scale_
     return filename
+glufo = None
+scalers = None
+dataset_test_glufo = None
+filename = None
 def prep_predict_glucose_tool(file):
     """
+    Function to predict future glucose of user.
     """
+    global formatter, series, scalers, glufo, dataset_test_glufo, filename
+    model = "Livia-Zaharia/gluformer_models"
+    model_path = hf_hub_download(repo_id=model, filename="gluformer_1samples_10000epochs_10heads_32batch_geluactivation_livia_mini_weights.pth")
+    formatter, series, scalers = load_data(
+        url=str(file),
+        config_path=file_directory / "config.yaml",
+        use_covs=True,
+        cov_type='dual',
+        use_static_covs=True
+    )
     formatter.params['gluformer'] = {
         'in_len': 96,  # example input length, adjust as necessary
     )
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    glufo.load_state_dict(torch.load(str(model_path), map_location=torch.device(device)))
     global dataset_test_glufo
     dataset_test_glufo = SamplingDatasetInferenceDual(
         target_series=series['test']['target'],
         covariates=series['test']['future'],
 def predict_glucose_tool(ind) -> go.Figure:
     device = "cuda" if torch.cuda.is_available() else "cpu"

utils/darts_processing.py CHANGED Viewed

@@ -81,6 +81,7 @@ def make_series(data: Dict[str, pd.DataFrame],
 def load_data(url: str,
               config_path: Path,
               use_covs: bool = False,
               cov_type: str = 'past',
               use_static_covs: bool = False, seed = 0):
@@ -164,7 +165,9 @@ def load_data(url: str,
         config = yaml.safe_load(f)
     config["data_csv_path"] = url
-    formatter = DataFormatter(config)
     assert use_covs is not None, 'use_covs must be specified in the load_data call'
     # convert to series

 def load_data(url: str,
               config_path: Path,
+              #df: pd.DataFrame,
               use_covs: bool = False,
               cov_type: str = 'past',
               use_static_covs: bool = False, seed = 0):
         config = yaml.safe_load(f)
     config["data_csv_path"] = url
+    formatter = DataFormatter(config
+                              #,df
+                              )
     assert use_covs is not None, 'use_covs must be specified in the load_data call'
     # convert to series