-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtext_classification_attention_sentence_level.py
More file actions
91 lines (72 loc) · 3.37 KB
/
text_classification_attention_sentence_level.py
File metadata and controls
91 lines (72 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
os.environ['KERAS_BACKEND'] = 'theano'
from data import load_preprocessed_data
from keras import backend as K
from keras import initializers
from keras.engine.topology import Layer, InputSpec
from keras.layers import LSTM, Bidirectional, Dense, TimeDistributed
from keras.layers import Embedding, Input, Flatten
from keras.models import Model
from data_3d import load_preprocessed_data
SENT_LEN = 50
NUM_SENT = 20
EMBD_DIM = 100
INP_DIM = 22000
class AttLayer(Layer):
def __init__(self, **kwargs):
self.init = initializers.get('normal')
super(AttLayer, self).__init__(** kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1],)))
self.trainable_weights = [self.W]
super(AttLayer, self).build(input_shape)
def call(self, x, mask=None):
eij = K.tanh(K.dot(x, self.W))
ai = K.exp(eij)
weights = ai / K.sum(ai, axis=1).dimshuffle(0, 'x')
weighted_input = x * weights.dimshuffle(0, 1, 'x')
return weighted_input.sum(axis=1)
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
if __name__ == "__main__":
# This is two level, first sentence should be encoded,
# then this encoded info should be used to further encode document
# (made of sentences)
# sentence encoder
sentence_input = Input(shape=(SENT_LEN,))
embeddings = Embedding(input_dim=INP_DIM, output_dim=EMBD_DIM,
input_length=SENT_LEN)(sentence_input)
sentence_encoded = Bidirectional(LSTM(100, return_sequences=True))(embeddings)
sentence_attention = AttLayer()(sentence_encoded)
sentence_encoder_model = Model(inputs=sentence_input,
outputs=sentence_attention)
# Document encoder
document_input = Input(shape=(NUM_SENT, SENT_LEN))
sentences_encoded = TimeDistributed(sentence_encoder_model)(document_input)
document_encoded = Bidirectional(LSTM(100, return_sequences=True))(sentences_encoded)
document_attention = AttLayer()(document_encoded)
fully_connected = Dense(32, activation="relu")(document_attention)
outputs_ = Dense(1, activation="sigmoid")(fully_connected)
hierarchial_model = Model(inputs=document_input, outputs=outputs_)
hierarchial_model.summary()
hierarchial_model.compile(loss="binary_crossentropy", optimizer="adam",
metrics=["accuracy"])
reviews, labels = load_preprocessed_data(sent_len=SENT_LEN,
num_sent=NUM_SENT)
hierarchial_model.fit(x=reviews, y=labels, epochs=3, validation_split=0.2,
batch_size=32)
# ADAM GatedRU (100) + AttLayer was fed (20, 50). Out best config so far.
# Epoch 2/3
# 20000/20000 [==============================] - 1258s - loss: 0.1870 -
# acc: 0.9296 - val_loss: 0.2757 - val_acc: 0.8952
# Epoch 3/3
# 20000/20000 [==============================] - 1254s - loss: 0.0919 -
# acc: 0.9688 - val_loss: 0.3359 - val_acc: 0.8850
# Trying out the same as above config except with LSTM units this time
# Epoch 2/3
# 20000/20000 [==============================] - 1473s - loss: 0.2041 -
# acc: 0.9229 - val_loss: 0.2908 - val_acc: 0.8854
# Epoch 3/3
# 20000/20000 [==============================] - 1370s - loss: 0.1244 -
# acc: 0.9543 - val_loss: 0.3134 - val_acc: 0.8790