RaghuCourage9605's picture
Update app.py
b7bbf35 verified
raw
history blame
2.09 kB
import pickle
import gradio as gr
import re
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn.linear_model import PassiveAggressiveClassifier
# Ensure required Spacy model is installed
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"])
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
# Load the saved vectorizer and model
with open('tfidf_vectorizer.pkl', 'rb') as vectorizer_file:
tfidf_vectorizer = pickle.load(vectorizer_file)
with open('pac_model.pkl', 'rb') as model_file: # Updated to PAC model
pac_model = pickle.load(model_file)
# Load Spacy language model
nlp = spacy.load("en_core_web_sm")
class TextPreprocessing:
@staticmethod
def Cleaning_text(text: str) -> str:
"""
Cleans the input text by converting to lowercase,
removing URLs, special characters, and unnecessary spaces.
"""
text = text.lower()
text = re.sub(r'http\S+|www\S+|https\S+', '', text)
text = re.sub(r"[^a-zA-Z\s]", '', text)
text = re.sub(r"n't", ' not', text)
text = re.sub(r"'s", '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
def preprocess_text(text):
"""
Preprocess the text by cleaning it using the TextPreprocessing class.
"""
cleaned_text = TextPreprocessing.Cleaning_text(text)
return cleaned_text
def predict_news(text):
"""
Predict whether the input news text is real or fake.
"""
cleaned_text = preprocess_text(text)
X_input = tfidf_vectorizer.transform([cleaned_text])
prediction = pac_model.predict(X_input)
return "Fake News" if prediction == 0 else "Real News"
# Gradio Interface
iface = gr.Interface(
fn=predict_news,
inputs=gr.Textbox(lines=7, placeholder="Enter the news article here..."),
outputs="text",
title="Fake News Classification",
description="Classify news articles as real or fake using a Passive Aggressive Classifier."
)
iface.launch()