c-ml-demand-eulp/process_eval_functions.py at main · DeltaE/c-ml-demand-eulp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 25 21:24:10 2025

@author: luisfernando
"""

import os
import re
import pandas as pd

# List all evaluation files dynamically
evaluation_files = [i for i in os.listdir() if 'model_evaluation' in i and i.endswith('.txt')]

def extract_metrics_from_file(file_name):
    # Open the file and read its contents
    with open(file_name, 'r') as file:
        content = file.read()

    # Define regex pattern to extract evaluation blocks
    pattern = (
        r"--------------------------------------------------\n"
        r"Evaluating Feature Set: (.*?) \((.*?)\)\n"
        r"Target: (.*?)\n"
        r"Model: (.*?)\n\n"
        r".*?Metrics.*?:\n"
        r"Mean Squared Error \(MSE\): (.*?)\n"
        r"Root Mean Squared Error \(RMSE\): (.*?)\n"
        r"Mean Absolute Error \(MAE\): (.*?)\n"
        r"R-squared \(R²\): (.*?)\n"
    )

    # Extract matches
    matches = re.findall(pattern, content, re.DOTALL)

    # Organize data into a list of dictionaries
    data = []
    for match in matches:
        feature_set, feature_details, target, model, mse, rmse, mae, r2 = match
        data.append({
            "File Name": file_name,
            "Feature Set": feature_set.strip(),
            "Feature Details": feature_details.strip(),
            "Target": target.strip(),
            "Model": model.strip(),
            "MSE": float(mse.strip()),
            "RMSE": float(rmse.strip()),
            "MAE": float(mae.strip()),
            "R-squared": float(r2.strip()),
        })

    return data

# Process each evaluation file and collect data
all_data = []
for file_name in evaluation_files:
    all_data.extend(extract_metrics_from_file(file_name))

# Convert the collected data to a DataFrame
df = pd.DataFrame(all_data)

# Save the data to a CSV file
output_file = "model_metrics_comparison.csv"
df.to_csv(output_file, index=False)

print(f"Metrics comparison saved to {output_file}")