More bugfixing, unit testing in C++

RobBa · RobBa · commit 7c62e0c3074d · 2026-03-22T12:30:39.000+01:00
diff --git a/src/python/py_nn/py_nn.cpp b/src/python/py_nn/py_nn.cpp
@@ -25,9 +25,10 @@ BOOST_PYTHON_MODULE(_nn)
   /**
    * Return values, so BP knows how to wrap them. Example: parameters(), see FfLayer
    * Omitting these steps will result in crashes when working with tensors returned by
-   * those functions
+   * those functions.
    */
   boost::python::object coreModule = boost::python::import("dl_lib._compiled._core");
+  // this works when function returns a single shared_ptr<Tensor>
   boost::python::register_ptr_to_python<std::shared_ptr<Tensor>>();
 
   using namespace Py_Util;
@@ -77,9 +78,10 @@ BOOST_PYTHON_MODULE(_nn)
     .add_property("bias", &module::FfLayer::getBias)
     // methods
     .def("parameters", +[](const module::FfLayer& f) -> boost::python::list {
+                            // we get a vector of shared_ptr, therefore need to give instructions on conversion
                             boost::python::list result;
                             for(auto& t : f.parameters())
-                                result.append(t);
+                                result.append(t); // forces conversion through Object*
                             return result;
                         })
     // operators
@@ -92,7 +94,9 @@ BOOST_PYTHON_MODULE(_nn)
     .def("__str__", &toString<module::ReLu>)
   ;
 
-  class_<module::LeakyReLu, std::shared_ptr<module::LeakyReLu>, boost::noncopyable>("LeakyReLU", init<ftype>())
+  class_<module::LeakyReLu, std::shared_ptr<module::LeakyReLu>, boost::noncopyable>("LeakyReLU")
+    .def(init<>()) // default epsilon
+    .def(init<ftype>())
     .def("__call__", WRAP_METHOD_ONE_TENSORARG(module::LeakyReLu, Py_nn::leakyReluF))
     .def("__str__", &toString<module::LeakyReLu>)
   ;
diff --git a/src/python/py_train/py_train.cpp b/src/python/py_train/py_train.cpp
@@ -25,6 +25,10 @@
 
 BOOST_PYTHON_MODULE(_train)
 {
+  // enable conversion from Tensor registered in _core 
+  boost::python::object coreModule = boost::python::import("dl_lib._compiled._core");
+  boost::python::register_ptr_to_python<std::shared_ptr<Tensor>>();
+
   using namespace boost::python;
 
   // Loss functions
diff --git a/tests/backend/test_train_loop.cpp b/tests/backend/test_train_loop.cpp
@@ -231,4 +231,48 @@ TEST(OverfitTest, CrossEntropyRMSPropOverfitsSmallDataset_OptimizedLoss) {
     EXPECT_LT((*finalLoss)[0], 0.05f)
         << "Network failed to overfit multiclass dataset"
         << "Final prediction: " << softmax(*pred) << "\nFinal loss: " << *finalLoss;
+}
+
+TEST(OptimizerTest, ZeroGrad_ClearsAllGradients) {
+    auto x = TensorFunctions::makeSharedTensor(
+        {4, 2}, {0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0}, false);
+    auto y = TensorFunctions::makeSharedTensor(
+        {4, 1}, {0.0, 1.0, 1.0, 0.0}, false);
+
+    auto net = makeBinaryNet();
+    auto loss = std::make_shared<train::BceSigmoidLoss>();
+    auto optim = std::make_shared<train::SgdOptimizer>(
+        net->parameters(), 0.01f);
+
+    // one forward/backward pass to populate gradients
+    auto pred = (*net)(x);
+    auto l = (*loss)(y, pred);
+    l->backward();
+
+    // verify gradients are non-zero before zeroing
+    bool anyNonZero = false;
+    for(auto& p : net->parameters()) {
+        if(p->getGrads()) {
+            for(tensorSize_t i = 0; i < p->getGrads()->getSize(); i++) {
+                if((*p->getGrads())[i] != 0.0f) {
+                    anyNonZero = true;
+                    break;
+                }
+            }
+        }
+    }
+    EXPECT_TRUE(anyNonZero) << "Expected some non-zero gradients before zeroGrad";
+
+    // zero gradients
+    optim->zeroGrad();
+
+    // verify all gradients are zero after zeroing
+    for(auto& p : net->parameters()) {
+        if(p->getGrads()) {
+            for(tensorSize_t i = 0; i < p->getGrads()->getSize(); i++) {
+                EXPECT_FLOAT_EQ((*p->getGrads())[i], 0.0f)
+                    << "Gradient not zeroed at index " << i;
+            }
+        }
+    }
 }

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,10 @@`
`25`	`25`
`26`	`26`	`BOOST_PYTHON_MODULE(_train)`
`27`	`27`	`{`
	`28`	`+ // enable conversion from Tensor registered in _core`
	`29`	`+ boost::python::object coreModule = boost::python::import("dl_lib._compiled._core");`
	`30`	`+ boost::python::register_ptr_to_python<std::shared_ptr<Tensor>>();`
	`31`	`+`
`28`	`32`	`using namespace boost::python;`
`29`	`33`
`30`	`34`	`// Loss functions`