Skip to content

Commit e121618

Browse files
committed
traverse the code, fixed bugs
Now, the model initialization part is working
1 parent 0aa3aaa commit e121618

14 files changed

Lines changed: 197 additions & 106 deletions
Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
import numpy as np
2+
import math
23

34
class WordEmbeddingMap:
45
def __init__(self, config):
5-
self.emb_dict = self.load(config)
6-
self.dim = self.emb_dict.shape[-1]
7-
8-
def load(self):
9-
emb_matrix = None
10-
emb_dict = dict()
11-
for line in open(config.get_string("glove.matrixResourceName")):
12-
if not len(line.split()) == 2:
13-
if "\t" in line:
14-
delimiter = "\t"
15-
else:
16-
delimiter = " "
17-
line_split = line.rstrip().split(delimiter)
18-
# extract word and vector
19-
word = line_split[0]
20-
x = np.array([float(i) for i in line_split[1:]])
21-
vector = (x /np.linalg.norm(x))
22-
embedding_size = vector.shape[0]
23-
emb_dict[word] = vector
24-
base = math.sqrt(6/embedding_size)
25-
emb_dict["<UNK>"] = np.random.uniform(-base,base,(embedding_size))
26-
return emb_dict
6+
self.emb_dict, self.dim = load(config)
277

288
def isOutOfVocabulary(self, word):
29-
return word not in self.emb_dict
9+
return word not in self.emb_dict
10+
11+
def load(config):
12+
emb_matrix = None
13+
emb_dict = dict()
14+
for line in open(config.get_string("glove.matrixResourceName")):
15+
if not len(line.split()) == 2:
16+
if "\t" in line:
17+
delimiter = "\t"
18+
else:
19+
delimiter = " "
20+
line_split = line.rstrip().split(delimiter)
21+
# extract word and vector
22+
word = line_split[0]
23+
x = np.array([float(i) for i in line_split[1:]])
24+
vector = (x /np.linalg.norm(x))
25+
embedding_size = vector.shape[0]
26+
emb_dict[word] = vector
27+
base = math.sqrt(6/embedding_size)
28+
emb_dict["<UNK>"] = np.random.uniform(-base,base,(embedding_size))
29+
return emb_dict, embedding_size

main/src/main/python/pytorch/constEmbeddingsGlove.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
from dataclasses import dataclass
22
import torch.nn as nn
33
from embeddings.wordEmbeddingMap import *
4+
from pyhocon import ConfigFactory
45

56
@dataclass
67
class ConstEmbeddingParameters:
78
emb: nn.Embedding
89
w2i: dict
910

10-
def ConstEmbeddingsGlove:
11+
class _ConstEmbeddingsGlove:
1112
def __init__(self):
1213
self.SINGLETON_WORD_EMBEDDING_MAP = None
13-
self.load('../resources/org/clulab/glove.conf')
14+
config = ConfigFactory.parse_file('../resources/org/clulab/glove.conf')
15+
self.load(config)
1416
self.dim = self.SINGLETON_WORD_EMBEDDING_MAP.dim
1517

1618
def load(self, config):
@@ -25,3 +27,5 @@ def mkConstLookupParams(self, words):
2527
emd = nn.Embedding.from_pretrained(weight)
2628
emd.weight.requires_grad=False
2729
return ConstEmbeddingParameters(emb ,w2i)
30+
31+
ConstEmbeddingsGlove = _ConstEmbeddingsGlove()

main/src/main/python/pytorch/embeddingLayer.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
from initialLayer import InitialLayer
1+
from pytorch.initialLayer import InitialLayer
22
import random
3-
from utils import *
3+
from pytorch.utils import *
44
import torch.nn as nn
55
import torch
6+
from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
67

78
DEFAULT_DROPOUT_PROB: float = DEFAULT_DROPOUT_PROBABILITY
89
DEFAULT_LEARNED_WORD_EMBEDDING_SIZE: int = 128
@@ -16,7 +17,7 @@
1617
DEFAULT_USE_IS_PREDICATE: int = -1
1718

1819
class EmbeddingLayer(InitialLayer):
19-
def __init__(w2i, # word to index
20+
def __init__(self, w2i, # word to index
2021
w2f, # word to frequency
2122
c2i, # character to index
2223
tag2i, # POS tag to index
@@ -68,7 +69,7 @@ def __init__(w2i, # word to index
6869
positionDim = 1 if distanceLookupParameters and useIsPredicate else 0
6970
predicateDim = positionEmbeddingSize if positionLookupParameters else 0
7071

71-
self.outDim = ConstEmbeddingsGlove().dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
72+
self.outDim = ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
7273
random.seed(RANDOM_SEED)
7374

7475
def forward(self, sentence, constEmbeddings, doDropout):
@@ -81,9 +82,9 @@ def forward(self, sentence, constEmbeddings, doDropout):
8182
# const word embeddings such as GloVe
8283
constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings)
8384
assert(constEmbeddingsExpressions.size(0) == len(words))
84-
if(tags) assert(len(tags) == len(words))
85-
if(nes) assert(len(nes) == len(words))
86-
if(headPositions) assert(len(headPositions) == len(words))
85+
if(tags): assert(len(tags) == len(words))
86+
if(nes): assert(len(nes) == len(words))
87+
if(headPositions): assert(len(headPositions) == len(words))
8788

8889
# build the word embeddings one by one
8990
embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, tags, nes, headPositions)
@@ -250,7 +251,7 @@ def load(cls, x2i):
250251
@classmethod
251252
def initialize(cls, config, paramPrefix, wordCounter):
252253

253-
if(not config.__contains__(paramPrefix)):
254+
if(not config.contains(paramPrefix)):
254255
return None
255256

256257
learnedWordEmbeddingSize = config.get_int(paramPrefix + ".learnedWordEmbeddingSize",DEFAULT_LEARNED_WORD_EMBEDDING_SIZE)
@@ -260,9 +261,9 @@ def initialize(cls, config, paramPrefix, wordCounter):
260261
neTagEmbeddingSize = config.get_int(paramPrefix + ".neTagEmbeddingSize",DEFAULT_NE_TAG_EMBEDDING_SIZE)
261262
distanceEmbeddingSize = config.get_int(paramPrefix + ".distanceEmbeddingSize",DEFAULT_DISTANCE_EMBEDDING_SIZE)
262263
distanceWindowSize = config.get_int(paramPrefix + ".distanceWindowSize",DEFAULT_DISTANCE_WINDOW_SIZE)
263-
useIsPredicate = config.getArgBoolean(paramPrefix + ".useIsPredicate",DEFAULT_USE_IS_PREDICATE == 1)
264+
useIsPredicate = config.get_bool(paramPrefix + ".useIsPredicate",DEFAULT_USE_IS_PREDICATE == 1)
264265
positionEmbeddingSize = config.get_int(paramPrefix + ".positionEmbeddingSize",DEFAULT_POSITION_EMBEDDING_SIZE)
265-
dropoutProb = config.get_float(paramPrefix + ".dropoutProb",EmbeddingLayer.DEFAULT_DROPOUT_PROB)
266+
dropoutProb = config.get_float(paramPrefix + ".dropoutProb",DEFAULT_DROPOUT_PROB)
266267

267268
wordList = [UNK_WORD] + sorted(wordCounter.keys())
268269
w2i = {w:i for i, w in enumerate(wordList)}
@@ -293,7 +294,7 @@ def initialize(cls, config, paramPrefix, wordCounter):
293294
distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None
294295
positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None
295296

296-
return cls(w2i, w2f, c2i, tag2i, ne2i,
297+
return cls(w2i, wordCounter, c2i, tag2i, ne2i,
297298
learnedWordEmbeddingSize,
298299
charEmbeddingSize,
299300
charRnnStateSize,
@@ -331,6 +332,8 @@ def initialize(cls, config, paramPrefix, wordCounter):
331332

332333

333334

335+
336+
334337

335338

336339

main/src/main/python/pytorch/forwardLayer.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
from torch.autograd import Variable
44
import torch.nn.functional as F
55

6-
from finalLayer import FinalLayer
7-
from greedyForwardLayer import GreedyForwardLayer
8-
from viterbiForwardLayer import ViterbiForwardLayer
6+
from pytorch.finalLayer import FinalLayer
97

10-
from utils import *
8+
from pytorch.utils import *
119

1210
class ForwardLayer(FinalLayer):
1311
def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None):
12+
super().__init__()
1413
self.inputSize = inputSize
1514
self.isDual = isDual
1615
self.t2i = t2i
@@ -73,6 +72,8 @@ def forward(inputExpressions, doDropout, headPositionsOpt = None):
7372

7473
@staticmethod
7574
def load(x2i):
75+
from pytorch.greedyForwardLayer import GreedyForwardLayer
76+
from pytorch.viterbiForwardLayer import ViterbiForwardLayer
7677
inferenceType = x2i["inferenceType"]
7778
if inferenceType == TYPE_VITERBI:
7879
pass
@@ -85,7 +86,9 @@ def load(x2i):
8586

8687
@staticmethod
8788
def initialize(config, paramPrefix, labelCounter, isDual, inputSize):
88-
if(not config.__contains__(paramPrefix)):
89+
from pytorch.greedyForwardLayer import GreedyForwardLayer
90+
from pytorch.viterbiForwardLayer import ViterbiForwardLayer
91+
if(not config.contains(paramPrefix)):
8992
return None
9093

9194
inferenceType = config.get_string(paramPrefix + ".inference", "greedy")

main/src/main/python/pytorch/greedyForwardLayer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from forwardLayer import *
2-
from utils import *
1+
from pytorch.forwardLayer import *
2+
from pytorch.utils import *
33
import numpy as np
44

55
class GreedyForwardLayer(ForwardLayer):
@@ -23,7 +23,7 @@ def saveX2i(self):
2323
return x2i
2424

2525
def __str__(self):
26-
return f"GreedyForwardLayer({inDim}, {outDim})"
26+
return f"GreedyForwardLayer({self.inDim}, {self.outDim})"
2727

2828
def inference(self, emissionScores):
2929
labelIds = np.argmax(lattice.data.numpy(), axis=1).tolist()

main/src/main/python/pytorch/layers.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import torch.nn as nn
2-
from utils import *
3-
from embeddingLayer import EmbeddingLayer
4-
from constEmbeddingsGlove import ConstEmbeddingsGlove
2+
from pytorch.utils import *
3+
from pytorch.embeddingLayer import EmbeddingLayer
4+
from pytorch.rnnLayer import RnnLayer
5+
from pytorch.forwardLayer import ForwardLayer
6+
from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
57

68
class Layers(object):
79
def __init__(self, initialLayer, intermediateLayers, finalLayer):
@@ -14,8 +16,7 @@ def __init__(self, initialLayer, intermediateLayers, finalLayer):
1416
else:
1517
self.outDim = None
1618

17-
if initialLayer and intermediateLayers and finalLayer:
18-
self.nonEmpty = True
19+
self.nonEmpty = initialLayer is not None and intermediateLayers is not None and finalLayer is not None
1920
self.isEmpty = not self.nonEmpty
2021

2122
self.initialLayer = initialLayer
@@ -25,43 +26,53 @@ def __init__(self, initialLayer, intermediateLayers, finalLayer):
2526
def __str__(self):
2627
s = ""
2728
started = False
28-
if(initialLayer.nonEmpty):
29-
s += "initial = " + initialLayer
29+
if(self.initialLayer is not None):
30+
s += "initial = " + str(self.initialLayer)
3031
started = True
31-
for i in intermediateLayers.indices:
32-
if(started) s += " "
33-
s += s"intermediate ({i+1}) = " + intermediateLayers[i]
32+
for i in range(len(self.intermediateLayers)):
33+
if(started): s += " "
34+
s += f"intermediate ({i+1}) = " + str(self.intermediateLayers[i])
3435
started = True
35-
if(finalLayer.nonEmpty):
36-
if(started) s += " "
37-
s += "final = " + finalLayer
36+
if(self.finalLayer is not None):
37+
if(started): s += " "
38+
s += "final = " + str(self.finalLayer)
3839
return s
3940

41+
def get_parameters(self):
42+
parameters = list()
43+
if self.initialLayer is not None:
44+
parameters += [p for p in self.initialLayer.parameters() if p.requires_grad]
45+
for il in self.intermediateLayers:
46+
parameters += [p for p in il.parameters() if p.requires_grad]
47+
if self.finalLayer is not None:
48+
parameters += [p for p in self.finalLayer.parameters() if p.requires_grad]
49+
return parameters
50+
4051
def forward(self, sentence, constEmbeddings, doDropout):
4152
if self.initialLayer.isEmpty:
4253
raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!")
4354
states = self.initialLayer(sentence, constEmbeddings, doDropout)
4455
for intermediateLayer in self.intermediateLayers:
4556
states = intermediateLayer(states, doDropout)
46-
if self.finalLayer.nonEmpty:
57+
if self.finalLayer is not None:
4758
states = self.finalLayer(states, sentence.headPositions, doDropout)
4859

4960
return states
5061

5162
def forwardFrom(self, inStates, headPositions, doDropout):
52-
if self.initialLayer.nonEmpty:
63+
if self.initialLayer is not None:
5364
raise RuntimeError(f"ERROR: you can't call forwardFrom() on a Layers object that has an initial layer: {self}")
5465
states = inStates
5566
for intermediateLayer in self.intermediateLayers:
5667
states = intermediateLayer(states, doDropout)
57-
if self.finalLayer.nonEmpty:
68+
if self.finalLayer is not None:
5869
states = self.finalLayer(states, sentence.headPositions, doDropout)
5970

6071
return states
6172

6273
def saveX2i(self):
6374
x2i = dict()
64-
if self.initialLayer.nonEmpty:
75+
if self.initialLayer is not None:
6576
x2i['hasInitial'] = 1
6677
x2i['initialLayer'] = self.initialLayer.saveX2i()
6778
else:
@@ -70,7 +81,7 @@ def saveX2i(self):
7081
x2i['intermediateLayers'] = list()
7182
for il in self.intermediateLayers:
7283
x2i['intermediateLayers'].append(il.saveX2i())
73-
if self.finalLayer.nonEmpty:
84+
if self.finalLayer is not None:
7485
x2i['hasFinal'] = 1
7586
x2i['finalLayer'] = self.finalLayer.saveX2i()
7687
else:
@@ -227,7 +238,7 @@ def parse(layers, sentence, constEmbeddings):
227238
@staticmethod
228239
def loss(layers, taskId, sentence, goldLabels):
229240
# Zheng: I am not sure this is the suitable way to load embeddings or not, need help...
230-
constEmbeddings = ConstEmbeddingsGlove().mkConstLookupParams(sentence.words)
241+
constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words)
231242
states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=True) # use dropout during training!
232243
return layers[taskId+1].finalLayer.loss(states, goldLabels)
233244

0 commit comments

Comments
 (0)