|
import pickle |
|
import gradio as gr |
|
import re |
|
import spacy |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
import numpy as np |
|
from sklearn.linear_model import PassiveAggressiveClassifier |
|
|
|
|
|
import subprocess |
|
import sys |
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"]) |
|
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) |
|
|
|
|
|
with open('tfidf_vectorizer.pkl', 'rb') as vectorizer_file: |
|
tfidf_vectorizer = pickle.load(vectorizer_file) |
|
|
|
with open('pac_model.pkl', 'rb') as model_file: |
|
pac_model = pickle.load(model_file) |
|
|
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
class TextPreprocessing: |
|
@staticmethod |
|
def Cleaning_text(text: str) -> str: |
|
""" |
|
Cleans the input text by converting to lowercase, |
|
removing URLs, special characters, and unnecessary spaces. |
|
""" |
|
text = text.lower() |
|
text = re.sub(r'http\S+|www\S+|https\S+', '', text) |
|
text = re.sub(r"[^a-zA-Z\s]", '', text) |
|
text = re.sub(r"n't", ' not', text) |
|
text = re.sub(r"'s", '', text) |
|
text = re.sub(r'\s+', ' ', text).strip() |
|
return text |
|
|
|
|
|
def preprocess_text(text): |
|
""" |
|
Preprocess the text by cleaning it using the TextPreprocessing class. |
|
""" |
|
cleaned_text = TextPreprocessing.Cleaning_text(text) |
|
return cleaned_text |
|
|
|
|
|
def predict_news(text): |
|
""" |
|
Predict whether the input news text is real or fake. |
|
""" |
|
cleaned_text = preprocess_text(text) |
|
X_input = tfidf_vectorizer.transform([cleaned_text]) |
|
prediction = pac_model.predict(X_input) |
|
return "Fake News" if prediction == 0 else "Real News" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict_news, |
|
inputs=gr.Textbox(lines=7, placeholder="Enter the news article here..."), |
|
outputs="text", |
|
title="Fake News Classification", |
|
description="Classify news articles as real or fake using a Passive Aggressive Classifier." |
|
) |
|
|
|
iface.launch() |