import pickle import gradio as gr import re import spacy from sklearn.feature_extraction.text import TfidfVectorizer import numpy as np from sklearn.linear_model import PassiveAggressiveClassifier # Ensure required Spacy model is installed import subprocess import sys subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"]) subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) # Load the saved vectorizer and model with open('tfidf_vectorizer.pkl', 'rb') as vectorizer_file: tfidf_vectorizer = pickle.load(vectorizer_file) with open('pac_model.pkl', 'rb') as model_file: # Updated to PAC model pac_model = pickle.load(model_file) # Load Spacy language model nlp = spacy.load("en_core_web_sm") class TextPreprocessing: @staticmethod def Cleaning_text(text: str) -> str: """ Cleans the input text by converting to lowercase, removing URLs, special characters, and unnecessary spaces. """ text = text.lower() text = re.sub(r'http\S+|www\S+|https\S+', '', text) text = re.sub(r"[^a-zA-Z\s]", '', text) text = re.sub(r"n't", ' not', text) text = re.sub(r"'s", '', text) text = re.sub(r'\s+', ' ', text).strip() return text def preprocess_text(text): """ Preprocess the text by cleaning it using the TextPreprocessing class. """ cleaned_text = TextPreprocessing.Cleaning_text(text) return cleaned_text def predict_news(text): """ Predict whether the input news text is real or fake. """ cleaned_text = preprocess_text(text) X_input = tfidf_vectorizer.transform([cleaned_text]) prediction = pac_model.predict(X_input) return "Fake News" if prediction == 0 else "Real News" # Gradio Interface iface = gr.Interface( fn=predict_news, inputs=gr.Textbox(lines=7, placeholder="Enter the news article here..."), outputs="text", title="Fake News Classification", description="Classify news articles as real or fake using a Passive Aggressive Classifier." ) iface.launch()