-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprediction.py
More file actions
131 lines (99 loc) · 4.85 KB
/
prediction.py
File metadata and controls
131 lines (99 loc) · 4.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
import settings
import os
import datetime
df = pd.read_csv(settings.DATA_URL,
names=['timestamp','residents','holiday','temperature'],
parse_dates=['timestamp'])
df = df.set_index('timestamp')
#print(df.head())
# bring data to half hour intervals
half_hour = df.resample('30min').last()
# Zustände fortschreiben
half_hour[['residents','holiday','temperature']] = \
half_hour[['residents','holiday','temperature']].ffill() # fill falls zu dem Zeitpunkt ein Log fehlt
#print(half_hour.head())
# prepare data for training (feature engineering:
# 06:00–22:00 o'clock, yesterdays evening 18–22 mean, weekday, month, week parity, holiday, target: number of residents at home in the next 33 half hours)
days = []
for day, group in half_hour.groupby(half_hour.index.date):
day_slice = group.between_time('06:00','22:00')
# gestriger Abend 18–22 Uhr
yesterday = pd.Timestamp(day) - pd.Timedelta(days=1)
yesterday_slice = half_hour.loc[
(half_hour.index.date == yesterday.date())
].between_time('18:00','22:00')
if len(day_slice) == 33 and len(yesterday_slice) > 0:
target = day_slice['residents'].values
yesterday_evening_state = yesterday_slice['residents'].mean()
days.append({
'date': day,
'weekday': day_slice.index[0].weekday(),
'month': day_slice.index[0].month,
'week_parity': day_slice.index[0].week % 2,
'holiday': int(day_slice['holiday'].iloc[0]),
'yesterday_evening_state': yesterday_evening_state,
'target': target
})
daily = pd.DataFrame(days)
daily = pd.DataFrame(days)
#print(daily.head())
# Features and target matrix
X = daily[['weekday','month','week_parity','holiday','yesterday_evening_state']]
Y = np.vstack(daily['target'].values)
# Train model with ALL available data (not just 80% for real prediction)
model = MultiOutputRegressor(RandomForestRegressor())
model.fit(X, Y)
# For real prediction: predict for the NEXT day (today) based on yesterday's evening data
# Assume the last day in our dataset is "yesterday" and we want to predict "today"
# Get yesterday's date (last day in dataset)
yesterday_date = daily['date'].iloc[-1]
# Calculate features for today
today = yesterday_date + pd.Timedelta(days=1)
# Check if we have data for today and get holiday information from it
today_data = df[df.index.date == today]
holiday_today = int(today_data['holiday'].iloc[0]) if len(today_data) > 0 else 0
yesterday = today - pd.Timedelta(days=1)
yesterday_key = yesterday.date() if hasattr(yesterday, 'date') else yesterday
yesterday_evening_state = df.loc[df.index.date == yesterday_key].between_time('18:00','22:00')['residents'].mean()
today_features = {
'weekday': today.weekday(),
'month': today.month,
'week_parity': today.isocalendar().week % 2,
'holiday': holiday_today, # Get holiday info from today's data (3rd column)
'yesterday_evening_state': yesterday_evening_state
}
# Make prediction for today
today_X = pd.DataFrame([today_features])
prediction_today = model.predict(today_X)[0] # Get the prediction array
print(f"Prediction for {today.strftime('%Y-%m-%d')} (today):")
print(f"Based on yesterday evening average of residents (18:00–22:00): {today_features['yesterday_evening_state']:.2f} residents")
# Model trained with all available data for real predictions
print(f"Model trained with {len(X)} days of historical data")
print(f"Last available day: {yesterday_date.strftime('%Y-%m-%d')}")
# predict for the following day (the current day at 6 AM, data available until 5:30 AM)
targetTemps = settings.targetTemps
targetTemp = []
predicted_residentsAtHome = []
times = []
# Use today's prediction
startTime = pd.Timestamp("06:00").time()
for i in range(len(prediction_today)):
times.append(startTime.strftime("%H:%M"))
predicted_residentsAtHome.append(prediction_today[i])
if predicted_residentsAtHome[i] < settings.switchingThresholds[0]:
targetTemp.append(targetTemps[0])
elif predicted_residentsAtHome[i] >= settings.switchingThresholds[0] and predicted_residentsAtHome[i] < settings.switchingThresholds[1]:
targetTemp.append(targetTemps[1])
else:
targetTemp.append(targetTemps[2])
startTime = (pd.Timestamp.combine(pd.Timestamp.today(), startTime) + pd.Timedelta(minutes=30)).time()
with open(os.path.join(os.path.dirname(__file__),'data', settings.PREDICTION_FILENAME),"w") as file:
for i in range(len(predicted_residentsAtHome)):
file.write(times[i] + settings.SEPARATOR + str(predicted_residentsAtHome[i]) + settings.SEPARATOR + str(targetTemp[i]) + "\n")
file.close()