maciejkula · ducovrossem · Dec 19, 2014 · Dec 19, 2014 · Dec 19, 2014 · Dec 19, 2014
diff --git a/glove/glove.py b/glove/glove.py
@@ -43,6 +43,8 @@ def __init__(self, no_components=30, learning_rate=0.05,
 
         self.dictionary = None
         self.inverse_dictionary = None
+
+        self.global_loss = None
 
     def fit(self, matrix, epochs=5, no_threads=2, verbose=False):
         """
@@ -86,8 +88,8 @@ def fit(self, matrix, epochs=5, no_threads=2, verbose=False):
 
             # Shuffle the coocurrence matrix
             np.random.shuffle(shuffle_indices)
-
-            fit_vectors(self.word_vectors,
+            
+            self.global_loss = fit_vectors(self.word_vectors,
                         self.vectors_sum_gradients,
                         self.word_biases,
                         self.biases_sum_gradients,
@@ -99,6 +101,9 @@ def fit(self, matrix, epochs=5, no_threads=2, verbose=False):
                         self.max_count,
                         self.alpha,
                         int(no_threads))
+
+            if verbose:            
+                print('Global loss: %d' % self.global_loss)
 
             if not np.isfinite(self.word_vectors).all():
                 raise Exception('Non-finite values in word vectors. '

diff --git a/glove/glove_cython.pyx b/glove/glove_cython.pyx
@@ -50,6 +50,9 @@ def fit_vectors(double[:, ::1] wordvec,
 
     # Loss and gradient variables.
     cdef double prediction, entry_weight, loss
+
+    # Define global loss
+    cdef double global_loss
 
     # Iteration variables
     cdef int i, j, shuffle_index
@@ -75,6 +78,9 @@ def fit_vectors(double[:, ::1] wordvec,
             # Compute loss and the example weight.
             entry_weight = double_min(1.0, (count / max_count)) ** alpha
             loss = entry_weight * (prediction - c_log(count))
+
+            # Update the weighted global loss
+            global_loss += 0.5 * entry_weight * (prediction - c_log(count)) **2
 
             # Update step: apply gradients and reproject
             # onto the unit sphere.
@@ -100,7 +106,8 @@ def fit_vectors(double[:, ::1] wordvec,
             learning_rate = initial_learning_rate / sqrt(wordbias_sum_gradients[word_b])
             wordbias[word_b] -= learning_rate * loss
             wordbias_sum_gradients[word_b] += loss ** 2
-
+
+    return global_loss
 
 def transform_paragraph(double[:, ::1] wordvec,
                         double[::1] wordbias,