phi_binary_classification/comparison_graphs_LLM.py at main · barek2k2/phi_binary_classification · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import matplotlib.pyplot as plt
import pandas as pd

# Data for BERT-base
data_bert = {
    "Step": range(1, 17),
    "Accuracy": [36.78, 63.21, 63.21, 63.21, 63.21, 63.21, 36.78, 63.32, 97.72, 99.31, 99.31, 99.72, 99.65, 99.65, 99.54, 83.15],
    "Precision": [0, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.0, 0.6331, 0.9908, 0.9964, 0.9964, 1.000, 1.000, 1.000, 0.9964, 0.8342],
    "Recall": [0, 1.000, 1.000, 1.000, 1.000, 1.000, 0.0, 0.9982, 0.9730, 0.9928, 0.9928, 0.9964, 0.9946, 0.9946, 0.9946, 0.9153],
    "F1 Score": [0, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.0, 0.7748, 0.9818, 0.9946, 0.9946, 0.9982, 0.9973, 0.9973, 0.9946, 0.8729]
}

# Data for RoBERTa-base
data_roberta = {
    "Step": range(1, 17),
    "Accuracy": [36.78, 63.21, 63.21, 63.21, 63.21, 63.21, 36.78, 63.21, 63.21, 94.87, 96.01, 97.26, 97.49, 95.44, 95.67, 63.21],
    "Precision": [0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.0, 0.6321, 0.6321, 0.9554, 0.9610, 0.9836, 0.9734, 0.9558, 0.9709, 0.6321],
    "Recall": [1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 0.0, 1.000, 1.000, 0.9640, 0.9766, 0.9730, 0.9874, 0.9730, 0.9604, 1.000],
    "F1 Score": [0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.0, 0.7746, 0.7746, 0.9596, 0.9687, 0.9783, 0.9803, 0.9643, 0.9656, 0.7746]
}

# Data for RoBERTa-large
data_roberta_large = {
    "Step": range(1, 17),
    "Accuracy": [36.78, 63.21, 63.21, 63.21, 63.21, 63.21, 63.21, 63.21, 63.21, 98.06, 98.40, 99.43, 99.20, 98.74, 98.17, 89.86],
    "Precision": [0.0, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.6321, 0.9804, 0.9856, 0.9946, 0.9910, 0.9875, 0.9856, 0.8896],
    "Recall": [0.0, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 0.9892, 0.9892, 0.9964, 0.9964, 0.9928, 0.9856, 0.9586],
    "F1 Score": [0.0, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.7746, 0.9848, 0.9874, 0.9955, 0.9937, 0.9901, 0.9856, 0.9228]
}

# Convert to DataFrames
df_bert = pd.DataFrame(data_bert)
df_roberta = pd.DataFrame(data_roberta)
df_roberta_large = pd.DataFrame(data_roberta_large)

# Plot 1: Accuracy Comparison
plt.figure(figsize=(12, 6))
plt.plot(df_bert["Step"], df_bert["Accuracy"], marker='o', label="BERT Accuracy", linestyle='-', linewidth=2, color='green')
plt.plot(df_roberta["Step"], df_roberta["Accuracy"], marker='o', label="RoBERTa Accuracy", linestyle='-', linewidth=2, alpha=0.7, color='orange', markerfacecolor='white')
plt.plot(df_roberta_large["Step"], df_roberta_large["Accuracy"], marker='o', label="RoBERTa-large Accuracy", linestyle='-', linewidth=2, alpha=0.7, color='blue', markerfacecolor='white')

plt.xlabel("Step")
plt.ylabel("Accuracy (%)")
plt.title("BERT-base vs RoBERTa-base vs RoBERTa-large: Accuracy Comparison")
plt.legend()
plt.grid(True)
plt.show()

# Plot 2: Precision, Recall, and F1 Score Comparison
plt.figure(figsize=(12, 6))
plt.plot(df_bert["Step"], df_bert["Precision"], marker='s', label="BERT Precision", linestyle='--', linewidth=2, color='green')
plt.plot(df_bert["Step"], df_bert["Recall"], marker='^', label="BERT Recall", linestyle='-.', linewidth=2, color='green')
plt.plot(df_bert["Step"], df_bert["F1 Score"], marker='d', label="BERT F1 Score", linestyle=':', linewidth=2, color='green')

plt.plot(df_roberta["Step"], df_roberta["Precision"], marker='s', label="RoBERTa Precision", linestyle='--', linewidth=2, alpha=0.7, color='orange', markerfacecolor='white')
plt.plot(df_roberta["Step"], df_roberta["Recall"], marker='^', label="RoBERTa Recall", linestyle='-.', linewidth=2, alpha=0.7, color='orange', markerfacecolor='white')
plt.plot(df_roberta["Step"], df_roberta["F1 Score"], marker='d', label="RoBERTa F1 Score", linestyle=':', linewidth=2, alpha=0.7, color='orange', markerfacecolor='white')

plt.plot(df_roberta_large["Step"], df_roberta_large["Precision"], marker='s', label="RoBERTa-large Precision", linestyle='--', linewidth=2, alpha=0.7, color='blue', markerfacecolor='white')
plt.plot(df_roberta_large["Step"], df_roberta_large["Recall"], marker='^', label="RoBERTa-large Recall", linestyle='-.', linewidth=2, alpha=0.7, color='blue', markerfacecolor='white')
plt.plot(df_roberta_large["Step"], df_roberta_large["F1 Score"], marker='d', label="RoBERTa-large F1 Score", linestyle=':', linewidth=2, alpha=0.7, color='blue', markerfacecolor='white')

plt.xlabel("Step")
plt.ylabel("Score (%)")
plt.title("BERT-base vs RoBERTa-base vs RoBERTa-large: Precision, Recall, and F1 Score Comparison")
plt.legend()
plt.grid(True)
plt.show()