-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrealtime_training.py
More file actions
49 lines (39 loc) · 1.33 KB
/
realtime_training.py
File metadata and controls
49 lines (39 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pickle
import sqlite3
import numpy as np
import os
# import HashingVectorizer from local dir
from vectorizer import vect
def update_model(db_path, model, batch_size=10000):
""" Update movie classification model using new reviews enterred by the users
:param db_path: string
Path to the SQLite database
:param model: model
SGDClassifier
:param batch_size: int
Update model each 10000 reviews in the database
:return: newly trained model
"""
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute('SELECT * from review_db')
results = c.fetchmany(batch_size)
while results:
data = np.array(results)
X = data[:, 0]
y = data[:, 1].astype(int)
classes = np.array([0, 1])
X_train = vect.transform(X)
model.partial_fit(X_train, y, classes=classes)
results = c.fetchmany(batch_size)
conn.close()
return model
cur_dir = os.path.dirname(__file__)
clf = pickle.load(open(os.path.join(cur_dir,
'pkl_objects',
'classifier.pkl'), 'rb'))
db = os.path.join(cur_dir, 'reviews.sqlite')
clf = update_model(db_path=db, model=clf, batch_size=10000)
# Serialize back newly trained model
pickle.dump(clf, open(os.path.join(cur_dir,
'pkl_objects', 'classifier.pkl'), 'wb'), protocol=4)