Spaces:

Gangsterbra123
/

ICS5110

Sleeping

App Files Files Community

ICS5110 / app.py

Gangsterbra123

Upload 17 files

233eb38 verified about 1 month ago

raw

history blame contribute delete

15.1 kB

	import gradio as gr
	import pickle
	import pandas as pd
	import ast
	import numpy as np
	import os
	import matplotlib.pyplot as plt

	# Set the option to opt into future behavior
	pd.set_option('future.no_silent_downcasting', True)

	# List of options for the dropdown

	[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)]

	workclass_options = [('State Government', 'State-gov'),
	('Self Employed Not Incorporated', 'Self-emp-not-inc'),
	'Private', ('Federal Government', 'Federal-gov'), ('Local Government', 'Local-gov'), ('Self Employed Incorporated', 'Self-emp-inc'), ('Without Pay', 'Without-pay')]

	education_option = [('Pre-School', 'Preschool'), '1st-4th', '5th-6th', '7th-8th', '9th', '10th', '11th', '12th', ('High School Graduate', 'HS-grad'), ('Collage', 'Some-college'), ('Associate Degree - Vocational', 'Assoc-voc'), ('Associate Degree - Academic', 'Assoc-acdm'), 'Bachelors', 'Masters', ('Professional School', 'Prof-school'), 'Doctorate']

	marital_status_option = [('Never Married','Never-married'), ('Married Civilian Spouse', 'Married-civ-spouse'), 'Divorced', 'Separated', ('Married Armed Forces Spouse', 'Married-AF-spouse'), 'Widowed', ('Married Spouse Absent', 'Married-spouse-absent')]
	occupation_option = [('Administrative Clerical', 'Adm-clerical'), ('Executive Managerial', 'Exec-managerial'), ('Handlers and Cleaners', 'Handlers-cleaners'), ('Professional Specialty', 'Prof-specialty'), 'Sales', ('Farming and Fishing', 'Farming-fishing'), ('Machine Operator and Inspector', 'Machine-op-inspct'), ('Other Service', 'Other-service'), ('Transport and Moving', 'Transport-moving'), ('Technical Support', 'Tech-support'), ('Craft and Repair', 'Craft-repair'), ('Protective Services', 'Protective-serv'), ('Armed Forces', 'Armed-Forces'), ('Private Household Services' ,'Priv-house-serv')]
	relationship_option = [('Not In Family', 'Not-in-family'), 'Husband', 'Wife', ('Biological Child', 'Own-child'), 'Unmarried', ('Other Relative', 'Other-relative')]
	race_option = ['White', 'Black', 'Other', ('Asian', 'Asian-Pac-Islander'), ('Indian', 'Amer-Indian-Eskimo')]
	sex_option = sorted(['Male', 'Female'])
	age = [0, 100]
	capital_gain = [0, 99999]
	capital_loss = [0, 4356]
	hours_per_week = [20, 60]

	children_count = [0, 15]
	bmi = [10, 100]
	region_option = ['southwest', 'southeast', 'northwest', 'northeast']
	smoker_option = ['yes', 'no']

	# Mapping for education
	education_mapping = "{'Preschool': 1, '1st-4th': 2, '5th-6th': 3, '7th-8th': 4, '9th': 5, '10th': 6, '11th': 7, '12th': 8, 'HS-grad': 9, 'Some-college': 10, 'Assoc-voc': 11, 'Assoc-acdm': 12, 'Bachelors': 13, 'Masters': 14, 'Prof-school': 15, 'Doctorate': 16}"
	education_dict = ast.literal_eval(education_mapping)

	# List of the columns present in dataframe used to train the model
	salary_columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
	'hours-per-week', 'workclass_Local-gov', 'workclass_Private',
	'workclass_Self-emp-inc', 'workclass_Self-emp-not-inc',
	'workclass_State-gov', 'workclass_Without-pay',
	'marital-status_Married-AF-spouse', 'marital-status_Married-civ-spouse',
	'marital-status_Married-spouse-absent', 'marital-status_Never-married',
	'marital-status_Separated', 'marital-status_Widowed',
	'occupation_Armed-Forces', 'occupation_Craft-repair',
	'occupation_Exec-managerial', 'occupation_Farming-fishing',
	'occupation_Handlers-cleaners', 'occupation_Machine-op-inspct',
	'occupation_Other-service', 'occupation_Priv-house-serv',
	'occupation_Prof-specialty', 'occupation_Protective-serv',
	'occupation_Sales', 'occupation_Tech-support',
	'occupation_Transport-moving', 'relationship_Not-in-family',
	'relationship_Other-relative', 'relationship_Own-child',
	'relationship_Unmarried', 'relationship_Wife', 'race_Asian-Pac-Islander',
	'race_Black', 'race_Other', 'race_White']

	health_columns = ['age', 'sex', 'bmi', 'children', 'smoker', 'region_northwest', 'region_southeast', 'region_southwest']

	# Code for SVM
	def Salary(model, workclass, education, marital_status, occupation, relationship, race, sex, age, capital_gain, capital_loss, hours_per_week):

	# Set the working directory to the script's directory
	os.chdir(os.path.dirname(os.path.abspath(__file__)))

	if model == 0:
	model_used = "SVM"
	with open('models/best_svm_OvM_Salary_Classification.pkl', 'rb') as f:
	loaded_model = pickle.load(f)

	# Loading the scaler and transform the data
	with open('models/z-score_scaler_svm_salary_classification.pkl', 'rb') as f:
	scaler = pickle.load(f)
	elif model == 1:
	model_used = "Logistic Regression"
	with open('models/best_lr_Salary_Classification.pkl', 'rb') as f:
	loaded_model = pickle.load(f)

	# Loading the scaler and transform the data
	with open('models/z-score_scaler_lr_salary_classification.pkl', 'rb') as f:
	scaler = pickle.load(f)
	elif model == 2:
	model_used = "Random Forest"
	with open('models/best_rf_Salary_Classification.pkl', 'rb') as f:
	loaded_model = pickle.load(f)

	# Loading the scaler and transform the data
	with open('models/z-score_scaler_rf_salary_classification.pkl', 'rb') as f:
	scaler = pickle.load(f)

	new_data = {
	'age': age,
	'workclass': workclass,
	'education': education,
	'marital-status': marital_status,
	'occupation': occupation,
	'relationship': relationship,
	'race': race,
	'sex': sex,
	'capital-gain': capital_gain,
	'capital-loss': capital_loss,
	'hours-per-week': hours_per_week,
	}
	new_data = pd.DataFrame([new_data])
	new_data['education'] = new_data['education'].map(education_dict)
	new_data = new_data.rename(columns={'education': 'education-num'})

	# Create an empty DataFrame with these columns
	formattedDF = pd.DataFrame(columns=salary_columns)

	# Copying over the continuous columns
	formattedDF['age'] = new_data['age']
	formattedDF['education-num'] = new_data['education-num']
	formattedDF['capital-gain'] = new_data['capital-gain']
	formattedDF['capital-loss'] = new_data['capital-loss']
	formattedDF['hours-per-week'] = new_data['hours-per-week']
	formattedDF['workclass_'+new_data['workclass']] = 1
	formattedDF['marital-status_'+new_data['marital-status']] = 1
	formattedDF['occupation_'+new_data['occupation']] = 1
	formattedDF['relationship_'+new_data['relationship']] = 1
	formattedDF['race_'+new_data['race']] = 1
	formattedDF['sex'] = formattedDF['sex'].apply(lambda x: 1 if x == 'Male' else 0)

	# Fill remaining columns with 0
	formattedDF.fillna(0, inplace=True)
	formattedDF = formattedDF.astype(int)
	formattedDF = formattedDF[formattedDF.columns.intersection(salary_columns)]

	# Assuming 'high_skew_columns' from training is a list of columns with high skewness
	for column in ['capital-gain', 'capital-loss']:
	formattedDF[column] = np.log1p(formattedDF[column])

	# Apply the scaler to the unseen data
	continuous_columns = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
	formattedDF[continuous_columns] = scaler.transform(formattedDF[continuous_columns])

	# Make predictions with the loaded model
	prediction = loaded_model.predict(formattedDF)

	probability = loaded_model.predict_proba(formattedDF)

	# Get the number of classes
	num_classes = probability.shape[1]

	class_dict = {
	0: '<=50K',
	1: '>50K'
	}

	# Select the probabilities for a single sample (e.g., the first sample)
	probabilities = probability[0]

	class_labels = [class_dict[i] for i in range(num_classes)]
	colors = plt.cm.viridis(np.linspace(0, 1, num_classes)) # Use a colormap for consistent colors

	fig, ax = plt.subplots(figsize=(10, 10))
	_, _, autotexts = ax.pie(probabilities, colors=colors, autopct='%1.1f%%', startangle=140, pctdistance=1.1)

	# Create a legend with colored boxes
	legend_elements = []
	for i, (color, label) in enumerate(zip(colors, class_labels)):
	legend_elements.append(plt.Rectangle((0, 0), 1, 1, color=color, label=label))

	ax.legend(handles=legend_elements, loc='upper left')
	ax.set_title("Predicted Class Probabilities")

	for i, p in enumerate(probabilities):
	prob = float(round(p*100, 2))
	if prob > 0:
	autotexts[i].set_text(f"{prob}%")
	else:
	autotexts[i].set_text('')

	salary_result = '<=50K' if prediction[0] == 0 else '>50K'

	return f"Predicted using {model_used} Salary Class: {salary_result}", fig

	def Health(model, age, sex, bmi, children, smoker, region):

	# Set the working directory to the script's directory
	os.chdir(os.path.dirname(os.path.abspath(__file__)))

	if model == 0:
	model_used = "SVM"
	with open('models/best_health_svm_OvM_Charges_Classification.pkl', 'rb') as f:
	loaded_model = pickle.load(f)

	# Loading the scaler and transform the data
	with open('models/z-score_scaler_svm_charges_classification.pkl', 'rb') as f:
	scaler = pickle.load(f)
	elif model == 1:
	model_used = "Logistic Regression"
	with open('models/best_health_lr_Charges_Classification.pkl', 'rb') as f:
	loaded_model = pickle.load(f)

	# Loading the scaler and transform the data
	with open('models/z-score_scaler_lr_charges_classification.pkl', 'rb') as f:
	scaler = pickle.load(f)
	elif model == 2:
	model_used = "Random Forest"
	with open('models/best_rf_Charges_Classification.pkl', 'rb') as f:
	loaded_model = pickle.load(f)

	# Loading the scaler and transform the data
	with open('models/z-score_scaler_rf_charges_classification.pkl', 'rb') as f:
	scaler = pickle.load(f)

	#Inverting the dict to map the 'charges' values back to 'charges' labels
	inverse_mapping_charges = {
	0: 'Very Low (<= 5000)',
	1: 'Low (5001 - 10000)',
	2: 'Moderate (10001 - 15000)',
	3: 'High (15001 - 20000)',
	4: 'Very High (> 20001)',
	}

	new_data = {
	'age': age,
	'sex': sex,
	'bmi': bmi,
	'children': children,
	'smoker': smoker,
	'region': region,
	}

	new_data = pd.DataFrame([new_data])

	# Create an empty DataFrame with these columns
	formattedDF = pd.DataFrame(columns=health_columns)

	# Copying over the continuous columns
	formattedDF['age'] = new_data['age']
	formattedDF['sex'] = new_data['sex'].apply(lambda x: 1 if x == 'Male' else 0)
	formattedDF['bmi'] = new_data['bmi']
	formattedDF['children'] = new_data['children']
	formattedDF['smoker'] = new_data['smoker'].apply(lambda x: 1 if x == 'Yes' else 0)
	formattedDF['region_'+new_data['region']] = 1

	# Fill remaining columns with 0
	formattedDF.fillna(0, inplace=True)
	formattedDF = formattedDF.astype(int)
	formattedDF = formattedDF[formattedDF.columns.intersection(health_columns)]

	# Apply the scaler to the unseen data
	continuous_columns = ['age', 'bmi']
	formattedDF[continuous_columns] = scaler.transform(formattedDF[continuous_columns])

	# Make predictions with the loaded model
	prediction = loaded_model.predict(formattedDF)[0]
	prediction = inverse_mapping_charges[prediction]

	probability = loaded_model.predict_proba(formattedDF)

	# Get the number of classes
	num_classes = probability.shape[1]

	class_dict = {
	0: 'Very Low (<= 5000)',
	1: 'Low (5001 - 10000)',
	2: 'Moderate (10001 - 15000)',
	3: 'High (15001 - 20000)',
	4: 'Very High (> 20001)',
	}

	# Select the probabilities for a single sample (e.g., the first sample)
	probabilities = probability[0]

	class_labels = [class_dict[i] for i in range(num_classes)]
	colors = plt.cm.viridis(np.linspace(0, 1, num_classes)) # Use a colormap for consistent colors

	fig, ax = plt.subplots(figsize=(10, 10))
	_, _, autotexts = ax.pie(probabilities, colors=colors, autopct='%1.1f%%', startangle=140, pctdistance=1.1)

	# Create a legend with colored boxes
	legend_elements = []
	for i, (color, label) in enumerate(zip(colors, class_labels)):
	legend_elements.append(plt.Rectangle((0, 0), 1, 1, color=color, label=label))

	ax.legend(handles=legend_elements, loc='upper left')
	ax.set_title("Predicted Class Probabilities")

	for i, p in enumerate(probabilities):
	prob = float(round(p*100, 2))
	if prob > 0:
	autotexts[i].set_text(f"{prob}%")
	else:
	autotexts[i].set_text('')

	return f"Predicted using {model_used} Charges Class: {prediction}", fig

	# interface one
	iface1 = gr.Interface(
	fn=Salary,
	inputs=[
	gr.Dropdown(choices=[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)], label="Model", value=0),
	gr.Dropdown(choices=workclass_options, label="Workclass"),
	gr.Dropdown(choices=education_option, label="Education"),
	gr.Dropdown(choices=marital_status_option, label="Marital Status"),
	gr.Dropdown(choices=occupation_option, label="Occupation"),
	gr.Dropdown(choices=relationship_option, label="Relationship"),
	gr.Dropdown(choices=race_option, label="Race"),
	gr.Dropdown(choices=sex_option, label="Sex"),
	gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"),
	gr.Slider(minimum=capital_gain[0], maximum=capital_gain[1], step=1, label="Capital Gain"),
	gr.Slider(minimum=capital_loss[0], maximum=capital_loss[1], step=1, label="Capital Loss"),
	gr.Slider(minimum=hours_per_week[0], maximum=hours_per_week[1], step=1, label="Hours per Week"),
	],
	outputs=[gr.Text(label="Predicted Label"), gr.Plot(label="Predicted Class Probabilities")],
	title="SVM - Salary",
	flagging_mode="never"
	)

	# interface two
	iface2 = gr.Interface(
	fn=Health,
	inputs=[
	gr.Dropdown(choices=[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)], label="Model", value=0),
	gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"),
	gr.Dropdown(choices=sex_option, label="Sex"),
	gr.Slider(minimum=bmi[0], maximum=bmi[1], step=0.1, label="BMI"),
	gr.Slider(minimum=children_count[0], maximum=children_count[1], step=1, label="No. of Children"),
	gr.Dropdown(choices=smoker_option, label="Is Smoker"),
	gr.Dropdown(choices=region_option, label="Region"),
	],
	outputs=[gr.Text(label="Predicted Label"), gr.Plot(label="Predicted Class Probabilities")],
	title="SVM - Health",
	flagging_mode="never"
	)

	demo = gr.TabbedInterface([iface1, iface2], ["Salary Prediction", "Health Charges Prediction"])

	# Run the interface
	demo.launch(share=True)