Spaces:

frugal-ai-challenge
/

submission-portal

Running

App Files Files Community

TheoLvs commited on 3 days ago

Commit

961cc08

1 Parent(s): 7c9a1bf

Updated submission

Browse files

Files changed (4) hide show

.gitignore +16 -1
app.py +208 -60
modelcard.md +61 -0
requirements.txt +4 -1

.gitignore CHANGED Viewed

	@@ -1,2 +1,17 @@
1	-
2	.ipynb_checkpoints/sandbox-checkpoint.ipynb

 .ipynb_checkpoints/sandbox-checkpoint.ipynb
+auto_evals/
+venv/
+__pycache__/
+.env
+.ipynb_checkpoints
+*ipynb
+.vscode/
+eval-queue/
+eval-results/
+eval-queue-bk/
+eval-results-bk/
+logs/
+emissions.csv

app.py CHANGED Viewed

@@ -1,76 +1,224 @@
 import gradio as gr
-import spaces
 from codecarbon import EmissionsTracker
-# Import necessary libraries
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import classification_report, accuracy_score
-import pandas as pd
 import numpy as np
-# Let's create a sample dataset (you can replace this with your own data)
-def create_sample_data():
-    np.random.seed(42)
-    n_samples = 10000
-    # Create features (X)
-    X = np.random.randn(n_samples, 4)  # 4 features
-    # Create target (y) - binary classification
-    y = (X[:, 0] + X[:, 1] + X[:, 2] > 0).astype(int)
-    return X, y
-# Get data (replace this with your data loading code)
-X, y = create_sample_data()
-tracker = EmissionsTracker()
-@spaces.GPU
-def submit(username):
-    tracker.start()
-    tracker.start_task("train_model")
-    # Split the data into training and testing sets
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.2, random_state=42
-    )
-    # Initialize the model
-    rf_model = RandomForestClassifier(
-        n_estimators=1000,
-        max_depth=5,
-        random_state=42
-    )
-    # Train the model
-    print("Training the model...")
-    rf_model.fit(X_train, y_train)
-    training_emissions = tracker.stop_task()
-    tracker.start_task("inference")
-    rf_model.predict(X_test)
-    inference_emissions = tracker.stop_task()
-    emissions = inference_emissions.emissions
-    energy = inference_emissions.energy_consumed
-    return [emissions, energy]
-# Update the interface configuration
-demo = gr.Interface(
-    fn=submit,
-    inputs=gr.Textbox(label="Username"),
-    outputs=[
-        gr.Number(label="Emissions (kgCO2eq)", precision=6),
-        gr.Number(label="Energy Consumed (kWh)", precision=6)
-    ],
-    title="Carbon Emissions Tracker",
-    description="Track the carbon emissions and energy consumption of model training and inference."
-)
-# Launch the Gradio interface
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from codecarbon import EmissionsTracker
+from datasets import load_dataset
 import numpy as np
+from sklearn.metrics import accuracy_score
+import random
+import os
+import json
+from datetime import datetime
+from huggingface_hub import HfApi
+from huggingface_hub import upload_file
+import tempfile
+from dotenv import load_dotenv
+# Use dotenv to load the environment variables
+load_dotenv()
+# Get HF token from environment variable
+HF_TOKEN = os.getenv("HF_TOKEN_TEXT")
+print(HF_TOKEN)
+if not HF_TOKEN:
+    print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.")
+tracker = EmissionsTracker(allow_multiple_runs=True)
+# Function to get space username and URL
+def get_space_info():
+    space_name = os.getenv("SPACE_ID", "")
+    if space_name:
+        try:
+            username = space_name.split("/")[0]
+            space_url = f"https://huggingface.co/spaces/{space_name}"
+            return username, space_url
+        except Exception as e:
+            print(f"Error getting space info: {e}")
+    return "local-user", "local-development"
+def clean_emissions_data(emissions_data):
+    """Remove unwanted fields from emissions data"""
+    data_dict = emissions_data.__dict__
+    fields_to_remove = ['timestamp', 'project_name', 'experiment_id', 'latitude', 'longitude']
+    return {k: v for k, v in data_dict.items() if k not in fields_to_remove}
+def evaluate():
+    # Get space info
+    username, space_url = get_space_info()
+    # Initialize tracker
+    tracker.start()
+    tracker.start_task("inference")
+    # Make random predictions
+    true_labels = test_dataset["label"]
+    predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
+    # Calculate accuracy
+    accuracy = accuracy_score(true_labels, predictions)
+    # Stop tracking emissions
+    emissions_data = tracker.stop_task()
+    # Prepare complete results
+    results = {
+        "username": username,
+        "space_url": space_url,
+        "submission_timestamp": datetime.now().isoformat(),
+        "accuracy": float(accuracy),
+        "energy_consumed_wh": emissions_data.energy_consumed * 1000,
+        "emissions_gco2eq": emissions_data.emissions * 1000,
+        "emissions_data": clean_emissions_data(emissions_data)
+    }
+    # Return both summary and detailed results
+    return [
+        accuracy,
+        emissions_data.emissions * 1000,
+        emissions_data.energy_consumed * 1000,
+        json.dumps(results, indent=2)
+    ]
+def submit_results(results_json):
+    if not results_json:
+        return gr.Warning("No results to submit")
+    if not HF_TOKEN:
+        return gr.Warning("HF_TOKEN not found. Please set up your Hugging Face token.")
+    # try:
+    # results_json is already a string, no need to load it
+    results_str = json.dumps(results_json)  # Parse the JSON string to get the data
+    # Create a temporary file with the results
+    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
+        # Write the original JSON string to file
+        f.write(results_str)
+        temp_path = f.name
+    # Upload to the dataset
+    api = HfApi(token=HF_TOKEN)
+    path_in_repo = f"submissions/{results_json['username']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+    api.upload_file(
+        path_or_fileobj=temp_path,
+        path_in_repo=path_in_repo,
+        repo_id="frugal-ai-challenge/public-leaderboard-text",
+        repo_type="dataset",
+        token=HF_TOKEN
+    )
+    # Clean up
+    os.unlink(temp_path)
+    return gr.Info("Results submitted successfully to the leaderboard! 🎉")
+    # except Exception as e:
+        # return gr.Warning(f"Error submitting results: {str(e)}")
+# Define the label mapping
+LABEL_MAPPING = {
+    "0_not_relevant": 0,  # No relevant claim detected
+    "1_not_happening": 1,  # Global warming is not happening
+    "2_not_human": 2,  # Not caused by humans
+    "3_not_bad": 3,  # Not bad or beneficial
+    "4_solutions_harmful_unnecessary": 4,  # Solutions harmful/unnecessary
+    "5_science_unreliable": 5,  # Science is unreliable
+    "6_proponents_biased": 6,  # Proponents are biased
+    "7_fossil_fuels_needed": 7  # Fossil fuels are needed
+}
+# Reverse mapping for display purposes
+LABEL_DESCRIPTIONS = {
+    0: "No relevant claim detected",
+    1: "Global warming is not happening",
+    2: "Not caused by humans",
+    3: "Not bad or beneficial",
+    4: "Solutions harmful/unnecessary",
+    5: "Science is unreliable",
+    6: "Proponents are biased",
+    7: "Fossil fuels are needed"
+}
+# Load and prepare the dataset
+print("Loading dataset...")
+dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train")
+# Convert string labels to integers
+dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
+# Split dataset
+train_test = dataset["train"].train_test_split(test_size=0.2, seed=42)
+train_dataset = train_test["train"]
+test_dataset = train_test["test"]
+# Display preview
+print("\nFirst 5 rows of test set:")
+for i, example in enumerate(test_dataset.select(range(5))):
+    print(f"\nExample {i+1}:")
+    print(f"Text: {example['quote'][:100]}...")
+    print(f"Label: {example['label']} - {LABEL_DESCRIPTIONS[example['label']]}")
+# Create the demo interface
+with gr.Blocks() as demo:
+    gr.Markdown("""
+# Frugal AI Challenge - Text task - Submission portal
+## Climate Disinformation Classification
+""")
+    with gr.Tabs():
+        with gr.Tab("Instructions"):
+            gr.Markdown("""
+To submit your results, please follow the steps below:
+## Prepare your model submission
+1. Clone the space of this portal on your own Hugging Face account.
+2. Modify the ``evaluate`` function to replace the baseline by your model loading and inference within the inference pass where the energy consumption and emissions are tracked.
+3. Eventually complete the requirements and/or any necessaries dependencies in your space.
+4. Write down your model card in the ``modelcard.md`` file.
+5. Deploy your space and verify that it works.
+6. (Optional) You can change the Space hardware to use any GPU directly on Hugging Face.
+## Submit your model to the leaderboard in the ``Model Submission`` tab
+7. Step 1 - Evaluate model: Click on the button to evaluate your model. This will run you model, computes the accuracy on the test set (20% of the train set), and track the energy consumption and emissions.
+8. Step 2 - Submit to leaderboard: Click on the button to submit your results to the leaderboard. This will upload the results to the leaderboard dataset and update the leaderboard.
+9. You can see the leaderboard at https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text
+""")
+        with gr.Tab("Model Submission"):
+            gr.Markdown("## Random Baseline Model")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    evaluate_btn = gr.Button("1. Evaluate model", variant="secondary")
+                with gr.Column(scale=1):
+                    submit_btn = gr.Button("2. Submit to leaderboard", variant="primary", size="lg")
+            with gr.Row():
+                accuracy_output = gr.Number(label="Accuracy", precision=4)
+                emissions_output = gr.Number(label="Emissions (gCO2eq)", precision=12)
+                energy_output = gr.Number(label="Energy Consumed (Wh)", precision=12)
+            with gr.Row():
+                results_json = gr.JSON(label="Detailed Results", visible=True)
+            evaluate_btn.click(
+                evaluate,
+                inputs=None,
+                outputs=[accuracy_output, emissions_output, energy_output, results_json]
+            )
+            submit_btn.click(
+                submit_results,
+                inputs=[results_json],
+                outputs=None  # No need for output component with popups
+            )
+        with gr.Tab("Model Card"):
+            with open("modelcard.md", "r") as f:
+                model_card_content = f.read()
+            gr.Markdown(model_card_content)
 if __name__ == "__main__":
     demo.launch()

modelcard.md ADDED Viewed

	@@ -0,0 +1,61 @@

+# Random Baseline Model Card
+## Model Description
+**Model Type:** Random Baseline Classifier
+**Task:** Climate Change Disinformation Classification
+**Version:** 1.0.0
+**Last Updated:** 2024
+### Overview
+This is a random baseline model for climate change disinformation classification. It randomly assigns labels to text inputs, serving as a baseline for comparing more sophisticated models.
+### Intended Use
+- **Primary Use:** Baseline comparison for climate disinformation classification models
+- **Intended Users:** Researchers and developers working on climate disinformation detection
+- **Out-of-Scope Uses:** Not intended for production or real-world classification tasks
+## Training Data
+**Dataset:** QuotaClimat/frugalaichallenge-text-train
+- Size: ~6000 examples
+- Split: 80% train, 20% test
+- Labels: 8 categories of climate disinformation claims
+### Labels
+0. No relevant claim detected
+1. Global warming is not happening
+2. Not caused by humans
+3. Not bad or beneficial
+4. Solutions harmful/unnecessary
+5. Science is unreliable
+6. Proponents are biased
+7. Fossil fuels are needed
+## Performance
+### Metrics
+- **Accuracy:** ~12.5% (random chance)
+- **Environmental Impact:**
+  - Emissions (kgCO2eq)
+  - Energy Consumed (kWh)
+### Limitations
+- Random predictions with no learning
+- No consideration of input text
+- Serves only as a baseline reference
+## Ethical Considerations
+- Model makes random predictions and should not be used for actual classification
+- Dataset contains sensitive topics related to climate disinformation
+- Environmental impact is tracked to promote awareness of AI's carbon footprint
+## Environmental Impact
+This model tracks its environmental impact using CodeCarbon, measuring:
+- Carbon emissions
+- Energy consumption
+## Caveats and Recommendations
+- Use only as a baseline comparison
+- Not suitable for production use
+- Consider environmental impact when running experiments

requirements.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 codecarbon==2.8.1
-scikit-learn==1.5.1

 codecarbon==2.8.1
+scikit-learn==1.5.1
+datasets==3.2.0
+huggingface-hub==0.26.3
+python-dotenv==1.0.1