@@ -231,4 +231,48 @@ TEST(OverfitTest, CrossEntropyRMSPropOverfitsSmallDataset_OptimizedLoss) {
231231 EXPECT_LT ((*finalLoss)[0 ], 0 .05f )
232232 << " Network failed to overfit multiclass dataset"
233233 << " Final prediction: " << softmax (*pred) << " \n Final loss: " << *finalLoss;
234+ }
235+
236+ TEST (OptimizerTest, ZeroGrad_ClearsAllGradients) {
237+ auto x = TensorFunctions::makeSharedTensor (
238+ {4 , 2 }, {0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 0.0 , 1.0 , 1.0 }, false );
239+ auto y = TensorFunctions::makeSharedTensor (
240+ {4 , 1 }, {0.0 , 1.0 , 1.0 , 0.0 }, false );
241+
242+ auto net = makeBinaryNet ();
243+ auto loss = std::make_shared<train::BceSigmoidLoss>();
244+ auto optim = std::make_shared<train::SgdOptimizer>(
245+ net->parameters (), 0 .01f );
246+
247+ // one forward/backward pass to populate gradients
248+ auto pred = (*net)(x);
249+ auto l = (*loss)(y, pred);
250+ l->backward ();
251+
252+ // verify gradients are non-zero before zeroing
253+ bool anyNonZero = false ;
254+ for (auto & p : net->parameters ()) {
255+ if (p->getGrads ()) {
256+ for (tensorSize_t i = 0 ; i < p->getGrads ()->getSize (); i++) {
257+ if ((*p->getGrads ())[i] != 0 .0f ) {
258+ anyNonZero = true ;
259+ break ;
260+ }
261+ }
262+ }
263+ }
264+ EXPECT_TRUE (anyNonZero) << " Expected some non-zero gradients before zeroGrad" ;
265+
266+ // zero gradients
267+ optim->zeroGrad ();
268+
269+ // verify all gradients are zero after zeroing
270+ for (auto & p : net->parameters ()) {
271+ if (p->getGrads ()) {
272+ for (tensorSize_t i = 0 ; i < p->getGrads ()->getSize (); i++) {
273+ EXPECT_FLOAT_EQ ((*p->getGrads ())[i], 0 .0f )
274+ << " Gradient not zeroed at index " << i;
275+ }
276+ }
277+ }
234278}
0 commit comments