forked from sejalshitole/NLP-Log-Classification-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassify.py
More file actions
52 lines (43 loc) · 2.02 KB
/
classify.py
File metadata and controls
52 lines (43 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from processor_regex import classify_with_regex
from processor_bert import classify_with_bert
from processor_llm import classify_with_llm
def classify(logs):
labels = []
for source, log_msg in logs:
label = classify_log(source, log_msg)
labels.append(label)
return labels
def classify_log(source, log_msg):
if source == "LegacyCRM":
label = classify_with_llm(log_msg)
else:
label = classify_with_regex(log_msg)
if not label:
label = classify_with_bert(log_msg)
return label
def classify_csv(input_file):
import pandas as pd
df = pd.read_csv(input_file)
# Perform classification
df["target_label"] = classify(list(zip(df["source"], df["log_message"])))
# Save the modified file
output_file = "output.csv"
df.to_csv(output_file, index=False)
return output_file
if __name__ == '__main__':
classify_csv("test.csv")
logs = [
("ModernCRM", "IP 192.168.133.114 blocked due to potential attack"),
("BillingSystem", "User User12345 logged in."),
("AnalyticsEngine", "File data_6957.csv uploaded successfully by user User265."),
("AnalyticsEngine", "Backup completed successfully."),
("ModernHR", "GET /v2/54fadb412c4e40cdbaed9335e4c35a9e/servers/detail HTTP/1.1 RCODE 200 len: 1583 time: 0.1878400"),
("ModernHR", "Admin access escalation detected for user 9429"),
("LegacyCRM", "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active."),
("LegacyCRM", "Invoice generation process aborted for order ID 8910 due to invalid tax calculation module."),
("LegacyCRM", "The 'BulkEmailSender' feature is no longer supported. Use 'EmailCampaignManager' for improved functionality."),
("LegacyCRM", " The 'ReportGenerator' module will be retired in version 4.0. Please migrate to the 'AdvancedAnalyticsSuite' by Dec 2025")
]
labels = classify(logs)
for log, label in zip(logs, labels):
print(log[1], "->", label)