-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmetrics.py
More file actions
79 lines (60 loc) · 3.08 KB
/
metrics.py
File metadata and controls
79 lines (60 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# This code is adapted from the official Kaggle Benchmark Kernel
# https://www.kaggle.com/dborkan/benchmark-kernel#Create-a-text-tokenizer
import numpy as np
from sklearn import metrics
import pandas as pd
import config as cfg
SUBGROUP_AUC = 'subgroup_auc'
BPSN_AUC = 'bpsn_auc' # stands for background positive, subgroup negative
BNSP_AUC = 'bnsp_auc' # stands for background negative, subgroup positive
TOXICITY_COLUMN = cfg.toxicity_column
TEXT_COLUMN = cfg.text_column
def compute_auc(y_true, y_pred):
try:
return metrics.roc_auc_score(y_true, y_pred)
except ValueError:
return np.nan
def compute_subgroup_auc(df, subgroup, label, model_name):
subgroup_examples = df[df[subgroup]]
return compute_auc(subgroup_examples[label], subgroup_examples[model_name])
def compute_bpsn_auc(df, subgroup, label, model_name):
"""Computes the AUC of the within-subgroup negative examples and the background positive examples."""
subgroup_negative_examples = df[df[subgroup] & ~df[label]]
non_subgroup_positive_examples = df[~df[subgroup] & df[label]]
examples = subgroup_negative_examples.append(non_subgroup_positive_examples)
return compute_auc(examples[label], examples[model_name])
def compute_bnsp_auc(df, subgroup, label, model_name):
"""Computes the AUC of the within-subgroup positive examples and the background negative examples."""
subgroup_positive_examples = df[df[subgroup] & df[label]]
non_subgroup_negative_examples = df[~df[subgroup] & ~df[label]]
examples = subgroup_positive_examples.append(non_subgroup_negative_examples)
return compute_auc(examples[label], examples[model_name])
def compute_bias_metrics_for_model(dataset,
subgroups,
model,
label_col,
include_asegs=False):
"""Computes per-subgroup metrics for all subgroups and one model."""
records = []
for subgroup in subgroups:
record = {'subgroup': subgroup,
'subgroup_size': len(dataset[dataset[subgroup]]),
SUBGROUP_AUC: compute_subgroup_auc(dataset, subgroup, label_col, model),
BPSN_AUC: compute_bpsn_auc(dataset, subgroup, label_col, model),
BNSP_AUC: compute_bnsp_auc(dataset, subgroup, label_col, model)}
records.append(record)
return pd.DataFrame(records).sort_values('subgroup_auc', ascending=True)
def calculate_overall_auc(df, model_name):
true_labels = df[TOXICITY_COLUMN]
predicted_labels = df[model_name]
return metrics.roc_auc_score(true_labels, predicted_labels)
def power_mean(series, p):
total = sum(np.power(series, p))
return np.power(total / len(series), 1 / p)
def get_final_metric(bias_df, overall_auc, POWER=-5, OVERALL_MODEL_WEIGHT=0.25):
bias_score = np.average([
power_mean(bias_df[SUBGROUP_AUC], POWER),
power_mean(bias_df[BPSN_AUC], POWER),
power_mean(bias_df[BNSP_AUC], POWER)
])
return (OVERALL_MODEL_WEIGHT * overall_auc) + ((1 - OVERALL_MODEL_WEIGHT) * bias_score)