File size: 2,365 Bytes
cb633b7
ab7c90b
f8463d0
ab7c90b
1c7cea2
ab7c90b
1c7cea2
 
 
ab7c90b
1c7cea2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00e437
1c7cea2
 
 
 
 
 
b54a101
 
 
 
 
 
1c7cea2
f8463d0
ab7c90b
f8463d0
 
ab7c90b
1c7cea2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8463d0
 
 
 
 
 
 
 
 
0cc4532
f8463d0
 
 
 
d00e437
f8463d0
1c7cea2
f8463d0
1c7cea2
 
ab7c90b
e5aa6a6
f8463d0
 
 
 
 
1c7cea2
 
cb633b7
 
f8463d0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

device = "cuda" if torch.cuda.is_available() else "cpu"

# ======================
# Load MODELS
# ======================

ROBERTA_MODEL = "roberta-base-openai-detector"
DISTIL_MODEL = "distilroberta-base"

# RoBERTa (AI Detector)
roberta_tokenizer = AutoTokenizer.from_pretrained(ROBERTA_MODEL)
roberta_model = AutoModelForSequenceClassification.from_pretrained(ROBERTA_MODEL).to(device)
roberta_model.eval()

# DistilRoBERTa (Auxiliary signal)
distil_tokenizer = AutoTokenizer.from_pretrained(DISTIL_MODEL)
distil_model = AutoModelForSequenceClassification.from_pretrained(
    DISTIL_MODEL,
    num_labels=2
).to(device)
distil_model.eval()


# ======================
# Prediction function
# ======================

def get_probs(tokenizer, model, text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)[0]

    return probs.cpu()


def detect_text(text):
    if not text.strip():
        return "Please enter some text.", None

    # Individual model predictions
    roberta_probs = get_probs(roberta_tokenizer, roberta_model, text)
    distil_probs = get_probs(distil_tokenizer, distil_model, text)

    # 🔥 ENSEMBLE (Soft Voting)
    ensemble_probs = (roberta_probs + distil_probs) / 2

    human_prob = ensemble_probs[0].item()
    ai_prob = ensemble_probs[1].item()

    if ai_prob > human_prob:
        label = "🤖 **AI Generated**"
        confidence = ai_prob
    else:
        label = "🧑 **Human Written**"
        confidence = human_prob

    message = f"{label}\n\nConfidence: **{confidence*100:.2f}%**"

    return message, {
        "Human": round(human_prob, 4),
        "AI": round(ai_prob, 4)
    }


# ======================
# Gradio UI
# ======================

demo = gr.Interface(
    fn=detect_text,
    inputs=gr.Textbox(lines=8, placeholder="Paste your text here..."),
    outputs=[
        gr.Markdown(label="Result"),
        gr.Label(label="Probabilities")
    ],
    title="AI Text Detector (Ensemble Model)",
    description="Ensemble of RoBERTa + DistilRoBERTa using soft voting."
)

demo.launch()