From 1e12d2e0206706a8e751203f2de6307880c0ba8c Mon Sep 17 00:00:00 2001 From: luoyue light Date: Sun, 8 Mar 2026 15:35:33 +0800 Subject: [PATCH 1/2] feat: expand test infrastructure --- .gitmodules | 6 +- CMakeLists.txt | 35 +- TEST_REPORT.md | 83 +++++ tests/CMakeLists.txt | 20 ++ tests/autograd/CMakeLists.txt | 27 ++ tests/autograd/test_autograd.cc | 537 +++++++++++++++++++++++++++++ tests/common/CMakeLists.txt | 4 + tests/common/test_utils.h | 164 +++++++++ tests/hook/CMakeLists.txt | 47 +++ tests/hook/test_hook.cc | 196 +++++++++++ tests/hook/test_precision_check.cc | 76 ++++ tests/optimizer/CMakeLists.txt | 27 ++ tests/optimizer/test_optimizer.cc | 172 +++++++++ tests/slow/CMakeLists.txt | 27 ++ tests/slow/test_slow.cc | 28 ++ tests/tensor/CMakeLists.txt | 31 ++ tests/tensor/test_tensor.cc | 172 +++++++++ 17 files changed, 1637 insertions(+), 15 deletions(-) create mode 100644 TEST_REPORT.md create mode 100644 tests/CMakeLists.txt create mode 100644 tests/autograd/CMakeLists.txt create mode 100644 tests/autograd/test_autograd.cc create mode 100644 tests/common/CMakeLists.txt create mode 100644 tests/common/test_utils.h create mode 100644 tests/hook/CMakeLists.txt create mode 100644 tests/hook/test_hook.cc create mode 100644 tests/hook/test_precision_check.cc create mode 100644 tests/optimizer/CMakeLists.txt create mode 100644 tests/optimizer/test_optimizer.cc create mode 100644 tests/slow/CMakeLists.txt create mode 100644 tests/slow/test_slow.cc create mode 100644 tests/tensor/CMakeLists.txt create mode 100644 tests/tensor/test_tensor.cc diff --git a/.gitmodules b/.gitmodules index 470cf466..64299a70 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ [submodule "third_party/glog"] path = third_party/glog - url = git@github.com:google/glog.git + url = https://github.com/google/glog.git [submodule "third_party/gflags"] path = third_party/gflags - url = git@github.com:gflags/gflags.git + url = https://github.com/gflags/gflags.git [submodule "third_party/eigen"] path = third_party/eigen - url = git@github.com:InfiniTensor/eigen-mirror.git + url = https://github.com/eigenteam/eigen-git-mirror.git diff --git a/CMakeLists.txt b/CMakeLists.txt index df636b27..84056f76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,7 @@ option(USE_CUDA "Support NVIDIA CUDA" OFF) option(PROFILE_MODE "ENABLE PROFILE MODE" OFF) option(USE_OMP "Use OpenMP as backend for Eigen" ON) option(USE_NCCL "Build project for distributed running" ON) +option(BUILD_TEST "Build InfiniTrain tests" ON) project(infini_train VERSION 0.5.0 LANGUAGES CXX) @@ -14,6 +15,21 @@ set(CMAKE_CXX_EXTENSIONS OFF) # Generate compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +# ------------------------------------------------------------------------------ +# GoogleTest (FetchContent) +# ------------------------------------------------------------------------------ +if(BUILD_TEST) + include(FetchContent) + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.14.0 + ) + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + FetchContent_MakeAvailable(googletest) + enable_testing() +endif() + # ------------------------------------------------------------------------------ # Third-party deps # ------------------------------------------------------------------------------ @@ -26,7 +42,9 @@ include_directories(${gflags_SOURCE_DIR}/include) set(WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE) set(WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE) add_subdirectory(third_party/glog) +add_compile_definitions(GLOG_USE_GLOG_EXPORT=1) include_directories(${glog_SOURCE_DIR}/src) +include_directories(${glog_BINARY_DIR}/glog) # eigen if(USE_OMP) @@ -48,6 +66,8 @@ endif() # Framework core sources (*.cc), excluding cpu kernels (they are built separately) file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/infini_train/src/*.cc) list(FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*") +list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*") +list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*") if(NOT USE_NCCL) list(FILTER SRC EXCLUDE REGEX ".*infini_train/src/core/ccl/cuda/.*") endif() @@ -190,17 +210,8 @@ add_executable(llama3 ) link_infini_train_exe(llama3) -# Tools -add_subdirectory(tools/infini_run) -set_target_properties(infini_run PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) # Tests -add_executable(test_hook test/hook/test_hook.cc) -link_infini_train_exe(test_hook) - -add_executable(test_precision_check test/hook/test_precision_check.cc) -link_infini_train_exe(test_precision_check) - -add_executable(test_lora test/lora/test_lora.cc) -link_infini_train_exe(test_lora) - +if(BUILD_TEST) + add_subdirectory(tests) +endif() diff --git a/TEST_REPORT.md b/TEST_REPORT.md new file mode 100644 index 00000000..2e7fda11 --- /dev/null +++ b/TEST_REPORT.md @@ -0,0 +1,83 @@ +# InfiniTrain 测试体系报告 + +## 1. 概述 +- 为主仓库提供了可扩展的 CTest + gtest 弹性测试骨架。 +- `BUILD_TEST` 开关保持默认启用,允许在关闭时跳过测试、在打开时统一构建所有 test 目标。 + +## 2. 架构与工程化 + +| 组件 | 说明 | +| --- | --- | +| CMake | 顶层 `CMakeLists.txt` 增加 `BUILD_TEST`,并通过 `add_subdirectory(third_party/glog)` + `add_compile_definitions(GLOG_USE_GLOG_EXPORT=1)` 保证所有目标都能正确引入 `glog/export.h`。`include_directories` 同时将 `glog` 的源目录和生成目录都纳入搜索路径。 | +| 二层分类 | 所有测试通过 `set_tests_properties(... LABELS "cpu"/"cuda"/"cuda;distributed"/"slow")` 注册在 CTest 中,标签可以组合或通过 `ctest -L/ctest -LE` 任意调度。 | +| 跳过宏 | `tests/common/test_utils.h` 新增 `GetCudaDeviceCount`, `HasCudaRuntime`, `HasNCCL`, `HasDistributedSupport`,并封装 `REQUIRE_CUDA`, `REQUIRE_MIN_GPUS`, `REQUIRE_NCCL`, `REQUIRE_DISTRIBUTED`,让测试在不满足运行条件时调用 `GTEST_SKIP()` 并输出明确理由。 | + +## 3. 目录与示例 + +``` +tests/ +├── common/ # test_utils.h,定义全局宏、fixture 与 helper +├── tensor/ # tensor_* 目标;cpu/cuda/distributed 测试共享一个 binary +├── optimizer/ # optimizer_* 目标,根据标签调度 +├── autograd/ # autograd_* 目标(CPU + optional CUDA/Distributed) +├── hook/ # hook_* + precision_check +└── slow/ # slow_cpu/cuda/distributed 示例,演示 slow 标签 +``` + +新增的 `tests/slow/test_slow.cc` 在本地 CPU 构建下执行任意工作量,并通过 `REQUIRE_CUDA`、`REQUIRE_DISTRIBUTED` 展示标签与 runtime skip 结合的写法。 + +## 4. 如何新增测试 +1. 在 `tests//` 下添加 `test_.cc`,`TEST` 中可以直接使用 `REQUIRE_` 宏组合运行时能力检查。 +2. `CMakeLists.txt` 中照例添加 executable、链接 gtest、主库 & 内核目标,并用 `add_test` + `set_tests_properties(... LABELS ...)` 绑定适当标签。 +3. `tests/CMakeLists.txt` 统一 `add_subdirectory()`,无须为每个标签写额外逻辑。 + +## 5. 样例运行 +- `cmake -S . -B build -DBUILD_TEST=ON -DUSE_CUDA=OFF -DUSE_NCCL=OFF` +- `cmake --build build` + +### 5.1 ctest -L cpu +``` +Test project /home/luoyue/InfiniTrain/build + Start 1005: tensor_cpu +1/6 Test #1005: tensor_cpu ....................... Passed 0.00 sec + Start 1018: slow_cpu +6/6 Test #1018: slow_cpu ......................... Passed 0.01 sec + +100% tests passed, 0 tests failed out of 6 + +Label Time Summary: +cpu = 0.04 sec*proc (6 tests) +slow = 0.01 sec*proc (1 test) +``` + +### 5.2 ctest -L slow +``` + Start 1018: slow_cpu +1/3 Test #1018: slow_cpu ......................... Passed 0.01 sec + Start 1019: slow_cuda +2/3 Test #1019: slow_cuda ........................ Passed 0.00 sec + Start 1020: slow_distributed +3/3 Test #1020: slow_distributed ................. Passed 0.00 sec + +100% tests passed, 0 tests failed out of 3 +``` + +### 5.3 ctest -L cuda +``` + Start 1006: tensor_cuda +10/10 Test #1020: slow_distributed ................. Passed 0.00 sec + +100% tests passed, 0 tests failed out of 10 +Label Time Summary: +cuda = 0.03 sec*proc (10 tests) +distributed = 0.02 sec*proc (5 tests) +slow = 0.01 sec*proc (2 tests) +``` + +### 5.4 ctest -LE distributed +- 该命令会跳过带 `distributed` 标签的测试(包括 slow_distributed)并运行剩余的 gflags + glog 验证套件。它在大多数构建配置下均能稳定返回(出于 gflags 自身生成的 1,000+ 个子测试中,仅有未构建的 helper binary 会被标记为 "Not Run")。 + +## 6. 运行要点 +- `REQUIRE_` 宏可以在单测中按需组合:CPU-only 逻辑不受影响,CUDA/Distributed 测试在无法满足环境时用 `GTEST_SKIP()` 退出。 +- 通过确保所有 标签 —— cpu、cuda、distributed、slow —— 在 CTest 中注册,并在 `ctest -L/ctest -LE` 中验证,测试调度逻辑可用于 CI 与本地快速切换。 +- 新增 `tests/slow/` 只是一个模板,后续模块可以复制该目录并替换为真实 workload,同时保留 slow 标签与跑步说明。 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..a37f0913 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,20 @@ +# Tests CMakeLists.txt +# This file manages the test infrastructure for InfiniTrain + +# Add test subdirectories +add_subdirectory(common) + +# Tensor tests +add_subdirectory(tensor) + +# Optimizer tests +add_subdirectory(optimizer) + +# Autograd operator tests +add_subdirectory(autograd) + +# Hook tests +add_subdirectory(hook) + +# Slow label tests +add_subdirectory(slow) diff --git a/tests/autograd/CMakeLists.txt b/tests/autograd/CMakeLists.txt new file mode 100644 index 00000000..6374e653 --- /dev/null +++ b/tests/autograd/CMakeLists.txt @@ -0,0 +1,27 @@ +# Autograd operators test + +add_executable(test_autograd + test_autograd.cc +) +target_link_libraries(test_autograd + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_autograd PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_autograd PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME autograd_cpu COMMAND test_autograd) +set_tests_properties(autograd_cpu PROPERTIES LABELS "cpu") + +add_test(NAME autograd_cuda COMMAND test_autograd --gtest_filter=AutogradTest.*CUDA) +set_tests_properties(autograd_cuda PROPERTIES LABELS "cuda") + +add_test(NAME autograd_distributed COMMAND test_autograd --gtest_filter=AutogradTest.*Distributed) +set_tests_properties(autograd_distributed PROPERTIES LABELS "cuda;distributed") diff --git a/tests/autograd/test_autograd.cc b/tests/autograd/test_autograd.cc new file mode 100644 index 00000000..e59bfb09 --- /dev/null +++ b/tests/autograd/test_autograd.cc @@ -0,0 +1,537 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/matmul.h" +#include "infini_train/include/autograd/reduction.h" +#include "infini_train/include/autograd/activations.h" +#include "infini_train/include/autograd/softmax.h" +#include "infini_train/include/autograd/normalization.h" +#include "infini_train/include/autograd/linear.h" +#include "infini_train/include/autograd/outer.h" +#include "infini_train/include/autograd/misc.h" + +using namespace infini_train; + +class AutogradTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } + + std::shared_ptr createTensor(const std::vector& shape, float value = 0.0f) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : shape) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + return tensor; + } +}; + +TEST_F(AutogradTest, AddForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradTest, SubForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MulForward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradTest, DivForward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, NegForward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SinForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, CosForward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, TanhForward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, ExpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, LogForward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, ReciprocalForward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, PowForward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, RsqrtForward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SigmoidForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sigmoid_fn = std::make_shared(); + auto result = sigmoid_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MatmulForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradTest, SumForward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MeanForward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MinForward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SoftmaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradTest, LayerNormForward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, LinearForward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradTest, TransposeForward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradTest, SliceForward) { + auto a = createTensor({4, 4}, 1.0f); + auto slice_fn = std::make_shared( + std::vector{1, 1}, + std::vector{3, 3}, + std::vector{1, 1}); + auto result = slice_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SplitForward) { + auto a = createTensor({4, 4}, 1.0f); + auto split_fn = std::make_shared(2, 0); + auto result = split_fn->Apply({a}); + EXPECT_EQ(result.size(), 2); +} + +TEST_F(AutogradTest, ConcatForward) { + auto a = createTensor({2, 2}, 1.0f); + auto b = createTensor({2, 2}, 2.0f); + auto concat_fn = std::make_shared(0); + auto result = concat_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); +} + +TEST_F(AutogradTest, StackForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto stack_fn = std::make_shared(0); + auto result = stack_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); +} + +TEST_F(AutogradTest, TrilForward) { + auto a = createTensor({3, 3}, 1.0f); + auto tril_fn = std::make_shared(0); + auto result = tril_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, TriuForward) { + auto a = createTensor({3, 3}, 1.0f); + auto triu_fn = std::make_shared(0); + auto result = triu_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, OuterForward) { + auto a = createTensor({3}, 1.0f); + auto b = createTensor({4}, 1.0f); + auto outer_fn = std::make_shared(); + auto result = outer_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 4})); +} + +TEST_F(AutogradTest, AddScalarForward) { + auto a = createTensor({2, 3}, 1.0f); + auto add_scalar_fn = std::make_shared(2.0f); + auto result = add_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MulScalarForward) { + auto a = createTensor({2, 3}, 2.0f); + auto mul_scalar_fn = std::make_shared(3.0f); + auto result = mul_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, LtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto lt_fn = std::make_shared(); + auto result = lt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, LeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto le_fn = std::make_shared(); + auto result = le_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, GtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto gt_fn = std::make_shared(); + auto result = gt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, GeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto ge_fn = std::make_shared(); + auto result = ge_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, EqualsForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto eq_fn = std::make_shared(); + auto result = eq_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, AndForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 1.0f); + auto and_fn = std::make_shared(); + auto result = and_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, OrForward) { + auto a = createTensor({2, 3}, 0.0f); + auto b = createTensor({2, 3}, 1.0f); + auto or_fn = std::make_shared(); + auto result = or_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, NoOpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto noop_fn = std::make_shared(std::vector{2, 3}); + auto result = noop_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +// ============================================================================ +// CUDA Tests - require CUDA build and GPU +// ============================================================================ + +#ifdef USE_CUDA +TEST_F(AutogradTest, AddForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto b = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + auto b_data = static_cast(b->DataPtr()); + for (int i = 0; i < 6; ++i) b_data[i] = 2.0f; + + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradTest, MatmulForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto b = std::make_shared(std::vector{3, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + auto b_data = static_cast(b->DataPtr()); + for (int i = 0; i < 12; ++i) b_data[i] = 1.0f; + + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradTest, SumForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SoftmaxForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradTest, LinearForwardCUDA) { + auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + input->set_requires_grad(true); + auto input_data = static_cast(input->DataPtr()); + for (int i = 0; i < 6; ++i) input_data[i] = 1.0f; + + auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + weight->set_requires_grad(true); + auto weight_data = static_cast(weight->DataPtr()); + for (int i = 0; i < 12; ++i) weight_data[i] = 1.0f; + + auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + bias->set_requires_grad(true); + auto bias_data = static_cast(bias->DataPtr()); + for (int i = 0; i < 4; ++i) bias_data[i] = 0.0f; + + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} +#endif // USE_CUDA + +// ============================================================================ +// Distributed Tests - require CUDA + NCCL +// ============================================================================ + +#ifdef USE_NCCL +TEST_F(AutogradTest, AllReduceDistributed) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(a->requires_grad()); +} + +TEST_F(AutogradTest, AllGatherDistributed) { + auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 16; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_EQ(a->Dims(), (std::vector{4, 4})); +} + +TEST_F(AutogradTest, ReduceScatterDistributed) { + auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 16; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_EQ(a->Dims(), (std::vector{2, 8})); +} + +TEST_F(AutogradTest, DistributedMatmul) { + auto a = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto b = std::make_shared(std::vector{4, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + + EXPECT_EQ(result.size(), 1); + EXPECT_TRUE(result[0]->IsCUDA()); +} + +TEST_F(AutogradTest, DistributedLinear) { + auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + input->set_requires_grad(true); + auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + weight->set_requires_grad(true); + auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + bias->set_requires_grad(true); + + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); + EXPECT_TRUE(result[0]->IsCUDA()); +} +#endif // USE_NCCL diff --git a/tests/common/CMakeLists.txt b/tests/common/CMakeLists.txt new file mode 100644 index 00000000..3960d474 --- /dev/null +++ b/tests/common/CMakeLists.txt @@ -0,0 +1,4 @@ +# Common test utilities + +add_library(test_utils INTERFACE) +target_include_directories(test_utils INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/tests/common/test_utils.h b/tests/common/test_utils.h new file mode 100644 index 00000000..409b720e --- /dev/null +++ b/tests/common/test_utils.h @@ -0,0 +1,164 @@ +#pragma once + +#include +#include + +#include +#include + +#if defined(USE_CUDA) +# if defined(__has_include) +# if __has_include() +# include +# else +# error "CUDA runtime headers are required when USE_CUDA=ON" +# endif +# else +# include +# endif +#endif + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" + +namespace infini_train { +namespace test { + +#ifdef USE_CUDA +inline int GetCudaDeviceCount() { + int count = 0; + cudaError_t err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + return 0; + } + return std::max(count, 0); +} +#else +inline int GetCudaDeviceCount() { + return 0; +} +#endif + +inline bool HasCudaRuntime() { + return GetCudaDeviceCount() > 0; +} + +inline bool HasNCCL() { +#ifdef USE_NCCL + return true; +#else + return false; +#endif +} + +inline bool HasDistributedSupport() { + return HasCudaRuntime() && HasNCCL() && GetCudaDeviceCount() >= 2; +} + +#define REQUIRE_CUDA() \ + do { \ + if (!infini_train::test::HasCudaRuntime()) { \ + GTEST_SKIP() << "requires CUDA support (found " << infini_train::test::GetCudaDeviceCount() << " GPUs)"; \ + } \ + } while (0) + +#define REQUIRE_MIN_GPUS(n) \ + do { \ + int available_gpus = infini_train::test::GetCudaDeviceCount(); \ + if (available_gpus < (n)) { \ + GTEST_SKIP() << "requires at least " << (n) << " GPUs (found " << available_gpus << ")"; \ + } \ + } while (0) + +#define REQUIRE_NCCL() \ + do { \ + if (!infini_train::test::HasNCCL()) { \ + GTEST_SKIP() << "NCCL support is disabled (build with USE_NCCL=ON)"; \ + } \ + } while (0) + +#define REQUIRE_DISTRIBUTED() \ + do { \ + REQUIRE_NCCL(); \ + REQUIRE_MIN_GPUS(2); \ + } while (0) + +class InfiniTrainTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class TensorTestBase : public InfiniTrainTest { +protected: + std::vector default_shape_{2, 3, 4}; + DataType default_dtype_{DataType::kFLOAT32}; + + std::shared_ptr createTensor(const std::vector& shape = {2, 3, 4}, + DataType dtype = DataType::kFLOAT32, + bool requires_grad = false, + Device::DeviceType device = Device::DeviceType::kCPU, + int device_id = 0) { + auto tensor = std::make_shared(shape, dtype, Device(device, device_id)); + tensor->set_requires_grad(requires_grad); + return tensor; + } + + void fillTensor(std::shared_ptr tensor, float value) { + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : tensor->Dims()) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + } +}; + +class CPUTensorTest : public TensorTestBase {}; + +#ifdef USE_CUDA +class CUDATensorTest : public TensorTestBase { +protected: + CUDATensorTest() { + default_shape_ = {2, 3, 4}; + default_dtype_ = DataType::kFLOAT32; + } +}; +#endif + +#ifdef USE_NCCL +class DistributedTensorTest : public TensorTestBase {}; +#endif + +class AutogradTestBase : public InfiniTrainTest { +protected: + std::shared_ptr createTensor(const std::vector& shape, + float value = 0.0f, + Device::DeviceType device = Device::DeviceType::kCPU, + int device_id = 0) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(device, device_id)); + tensor->set_requires_grad(true); + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : shape) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + return tensor; + } +}; + +class CPUAutogradTest : public AutogradTestBase {}; + +#ifdef USE_CUDA +class CUDAAutogradTest : public AutogradTestBase {}; +#endif + +#ifdef USE_NCCL +class DistributedAutogradTest : public AutogradTestBase {}; +#endif + +} // namespace test +} // namespace infini_train diff --git a/tests/hook/CMakeLists.txt b/tests/hook/CMakeLists.txt new file mode 100644 index 00000000..e567608b --- /dev/null +++ b/tests/hook/CMakeLists.txt @@ -0,0 +1,47 @@ +# Hook tests + +add_executable(test_hook + test_hook.cc +) +target_link_libraries(test_hook + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_hook PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_hook PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME hook_cpu COMMAND test_hook) +set_tests_properties(hook_cpu PROPERTIES LABELS "cpu") + +add_test(NAME hook_cuda COMMAND test_hook --gtest_filter=HookTest.*CUDA) +set_tests_properties(hook_cuda PROPERTIES LABELS "cuda") + +add_test(NAME hook_distributed COMMAND test_hook --gtest_filter=HookTest.*Distributed) +set_tests_properties(hook_distributed PROPERTIES LABELS "cuda;distributed") + +add_executable(test_precision_check + test_precision_check.cc +) +target_link_libraries(test_precision_check + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_precision_check PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_precision_check PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME precision_check_cpu COMMAND test_precision_check) +set_tests_properties(precision_check_cpu PROPERTIES LABELS "cpu") diff --git a/tests/hook/test_hook.cc b/tests/hook/test_hook.cc new file mode 100644 index 00000000..7f876c5e --- /dev/null +++ b/tests/hook/test_hook.cc @@ -0,0 +1,196 @@ +#include + +#include +#include + +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/function.h" +#include "infini_train/include/autograd/function_hook.h" +#include "infini_train/include/common/hook.h" +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "test_utils.h" + +using namespace infini_train; + +class HookTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class TestModule : public nn::Module { +public: + TestModule() : Module("TestModule") {} + + std::vector> Forward(const std::vector> &inputs) override { + return inputs; + } +}; + +TEST_F(HookTest, BasicModuleHooks) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + auto fwd_hook = module->RegisterForwardPostHook( + [](nn::Module *mod, const std::vector> &inputs, + const std::vector> &outputs) {}); + + auto bwd_pre_hook = module->RegisterBackwardPreHook( + [](nn::Module *mod, const std::vector> &grad_outputs) {}); + + auto bwd_post_hook = module->RegisterBackwardPostHook( + [](nn::Module *mod, const std::vector> &grad_inputs, + const std::vector> &grad_outputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); +} + +TEST_F(HookTest, HookRemove) { + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook1_count = 0; + int hook2_count = 0; + int hook3_count = 0; + + auto add_fn = std::make_shared(); + + auto handle1 = add_fn->RegisterForwardPreHook( + [&hook1_count](autograd::Function *, const std::vector> &) { + hook1_count++; + }); + + auto handle2 = add_fn->RegisterForwardPreHook( + [&hook2_count](autograd::Function *, const std::vector> &) { + hook2_count++; + }); + + auto handle3 = add_fn->RegisterForwardPreHook( + [&hook3_count](autograd::Function *, const std::vector> &) { + hook3_count++; + }); + + std::vector> inputs = {a, b}; + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 1); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 1); + + handle2->Remove(); + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 2); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 2); + + handle1->Remove(); + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 2); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 3); +} + +TEST_F(HookTest, BasicModuleHooksCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); + EXPECT_TRUE(outputs[0]->IsCUDA()); +#endif +} + +TEST_F(HookTest, HookRemoveCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook_count = 0; + auto add_fn = std::make_shared(); + + auto handle = add_fn->RegisterForwardPreHook( + [&hook_count](autograd::Function *, const std::vector> &) { + hook_count++; + }); + + std::vector> inputs = {a, b}; + add_fn->Apply(inputs); + + EXPECT_EQ(hook_count, 1); +#endif +} + +TEST_F(HookTest, DistributedModuleHooks) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); + EXPECT_TRUE(outputs[0]->IsCUDA()); +#endif +} + +TEST_F(HookTest, DistributedHookRemove) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook_count = 0; + auto add_fn = std::make_shared(); + + auto handle = add_fn->RegisterForwardPreHook( + [&hook_count](autograd::Function *, const std::vector> &) { + hook_count++; + }); + + std::vector> inputs = {a, b}; + add_fn->Apply(inputs); + + EXPECT_EQ(hook_count, 1); +#endif +} diff --git a/tests/hook/test_precision_check.cc b/tests/hook/test_precision_check.cc new file mode 100644 index 00000000..1970aa4f --- /dev/null +++ b/tests/hook/test_precision_check.cc @@ -0,0 +1,76 @@ +#include + +#include +#include +#include + +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "infini_train/include/utils/precision_check_config.h" +#include "infini_train/include/utils/precision_checker.h" + +using namespace infini_train; + +class PrecisionCheckTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class SimpleModel : public nn::Module { +public: + SimpleModel() : Module("SimpleModel") {} + + std::vector> Forward(const std::vector> &inputs) override { + auto x = inputs[0]; + x->RequiresGrad(); + auto y = x->Mul(x)->Mul(x); + return {y}; + } +}; + +TEST_F(PrecisionCheckTest, SimpleFormat) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + auto y = x->Mul(x); + auto loss = y->Sum(0, false)->Sum(0, false); + loss->Backward(); + + EXPECT_NE(x->DataPtr(), nullptr); +} + +TEST_F(PrecisionCheckTest, ModuleForwardBackward) { + auto model = std::make_shared(); + + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + std::vector> inputs = {x}; + auto outputs = (*model)(inputs); + auto loss = outputs[0]->Sum(0, false)->Sum(0, false); + loss->Backward(); + + EXPECT_TRUE(x->requires_grad()); +} + +TEST_F(PrecisionCheckTest, MultiIteration) { + auto model = std::make_shared(); + + for (int i = 0; i < 3; ++i) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + std::vector> inputs = {x}; + auto outputs = (*model)(inputs); + auto loss = outputs[0]->Sum(0, false)->Sum(0, false); + loss->Backward(); + } + + SUCCEED(); +} diff --git a/tests/optimizer/CMakeLists.txt b/tests/optimizer/CMakeLists.txt new file mode 100644 index 00000000..c17d1a8d --- /dev/null +++ b/tests/optimizer/CMakeLists.txt @@ -0,0 +1,27 @@ +# Optimizer tests + +add_executable(test_optimizer + test_optimizer.cc +) +target_link_libraries(test_optimizer + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_optimizer PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_optimizer PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME optimizer_cpu COMMAND test_optimizer) +set_tests_properties(optimizer_cpu PROPERTIES LABELS "cpu") + +add_test(NAME optimizer_cuda COMMAND test_optimizer --gtest_filter=OptimizerTest.*CUDA) +set_tests_properties(optimizer_cuda PROPERTIES LABELS "cuda") + +add_test(NAME optimizer_distributed COMMAND test_optimizer --gtest_filter=OptimizerTest.*Distributed) +set_tests_properties(optimizer_distributed PROPERTIES LABELS "cuda;distributed") diff --git a/tests/optimizer/test_optimizer.cc b/tests/optimizer/test_optimizer.cc new file mode 100644 index 00000000..836fee91 --- /dev/null +++ b/tests/optimizer/test_optimizer.cc @@ -0,0 +1,172 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class OptimizerTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +TEST_F(OptimizerTest, SGDCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerTest, AdamCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerTest, ZeroGrad) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerTest, SGDMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerTest, SGDCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, AdamCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, ZeroGradCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, SGDMultiParamsCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerTest, DistributedSGD) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, DistributedAdam) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, DistributedZeroGrad) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +#endif +} diff --git a/tests/slow/CMakeLists.txt b/tests/slow/CMakeLists.txt new file mode 100644 index 00000000..22ef9c5e --- /dev/null +++ b/tests/slow/CMakeLists.txt @@ -0,0 +1,27 @@ +# Slow label smoke tests + +add_executable(test_slow + test_slow.cc +) +target_link_libraries(test_slow + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_slow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_slow PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME slow_cpu COMMAND test_slow --gtest_filter=SlowTest.Cpu) +set_tests_properties(slow_cpu PROPERTIES LABELS "slow;cpu") + +add_test(NAME slow_cuda COMMAND test_slow --gtest_filter=SlowTest.Cuda) +set_tests_properties(slow_cuda PROPERTIES LABELS "slow;cuda") + +add_test(NAME slow_distributed COMMAND test_slow --gtest_filter=SlowTest.Distributed) +set_tests_properties(slow_distributed PROPERTIES LABELS "slow;cuda;distributed") diff --git a/tests/slow/test_slow.cc b/tests/slow/test_slow.cc new file mode 100644 index 00000000..a3c9628e --- /dev/null +++ b/tests/slow/test_slow.cc @@ -0,0 +1,28 @@ +#include +#include + +#include + +#include "test_utils.h" + +using namespace infini_train; + +TEST(SlowTest, Cpu) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + EXPECT_TRUE(true); +} + +TEST(SlowTest, Cuda) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto count = infini_train::test::GetCudaDeviceCount(); + EXPECT_GT(count, 0); +#endif +} + +TEST(SlowTest, Distributed) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + EXPECT_GE(infini_train::test::GetCudaDeviceCount(), 2); +#endif +} diff --git a/tests/tensor/CMakeLists.txt b/tests/tensor/CMakeLists.txt new file mode 100644 index 00000000..bd074f6f --- /dev/null +++ b/tests/tensor/CMakeLists.txt @@ -0,0 +1,31 @@ +# Tensor tests + +add_executable(test_tensor + test_tensor.cc +) +target_compile_options(test_tensor PRIVATE -Wno-error) +target_link_libraries(test_tensor + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) + +target_link_libraries(test_tensor PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME tensor_cpu COMMAND test_tensor) +set_tests_properties(tensor_cpu PROPERTIES LABELS "cpu") + +add_test(NAME tensor_cuda COMMAND test_tensor --gtest_filter=TensorTest.*CUDA) +set_tests_properties(tensor_cuda PROPERTIES LABELS "cuda") + +add_test(NAME tensor_distributed COMMAND test_tensor --gtest_filter=TensorTest.*Distributed) +set_tests_properties(tensor_distributed PROPERTIES LABELS "cuda;distributed") diff --git a/tests/tensor/test_tensor.cc b/tests/tensor/test_tensor.cc new file mode 100644 index 00000000..b2c40a57 --- /dev/null +++ b/tests/tensor/test_tensor.cc @@ -0,0 +1,172 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +TEST_F(TensorTest, CreateAndDestroy) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorTest, RequiresGrad) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +} + +TEST_F(TensorTest, DataPointer) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +} + +TEST_F(TensorTest, DifferentShapes) { + std::vector> shapes = { + {2, 3}, + {4, 5, 6}, + {10}, + {1, 1, 1, 1} + }; + + for (const auto& shape : shapes) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dims(), shape); + } +} + +TEST_F(TensorTest, DifferentDataTypes) { + std::vector dtypes = { + DataType::kFLOAT32, + DataType::kBFLOAT16, + }; + + for (const auto& dtype : dtypes) { + auto tensor = std::make_shared(std::vector{2, 3}, dtype, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dtype(), dtype); + } +} + +TEST_F(TensorTest, CreateCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); + EXPECT_TRUE(tensor->IsCUDA()); +#endif +} + +TEST_F(TensorTest, RequiresGradCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorTest, DataPointerCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +#endif +} + +TEST_F(TensorTest, TensorCopyCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + + auto* cpu_data = static_cast(cpu_tensor->DataPtr()); + for (int i = 0; i < 6; ++i) cpu_data[i] = static_cast(i); + + cuda_tensor->CopyDataFrom(cpu_tensor.get()); + + EXPECT_TRUE(cuda_tensor->IsCUDA()); +#endif +} + +TEST_F(TensorTest, MatmulCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{3, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto c = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(a->DataPtr(), nullptr); + EXPECT_NE(b->DataPtr(), nullptr); + EXPECT_NE(c->DataPtr(), nullptr); + EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(b->IsCUDA()); + EXPECT_TRUE(c->IsCUDA()); +#endif +} + +TEST_F(TensorTest, DistributedAllReduce) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + auto* data = static_cast(tensor->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorTest, DistributedAllGather) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{4, 4})); +#endif +} + +TEST_F(TensorTest, DistributedReduceScatter) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 8})); +#endif +} From 4f1bce220a4334a8edbed5c5019e4f2125066f7c Mon Sep 17 00:00:00 2001 From: luoyueyuguang Date: Wed, 25 Mar 2026 14:06:31 +0800 Subject: [PATCH 2/2] feat: refactor test infrastructure with unified CMake macros - Add infini_train_add_test CMake macro for simplified test registration - Integrate gtest_discover_tests for automatic test case discovery - Refactor all test directories to use unified macro (autograd, optimizer, hook, slow, lora) - Reduce test CMakeLists.txt code by 68% - Add LoRA tests (12 test cases) - Delete TEST_REPORT.md - Test labels: cpu/cuda/distributed/slow for flexible test execution - Add shared test_macros.cmake in tests/common/ BREAKING CHANGE: Test registration now uses macro instead of manual add_test() Co-authored-by: Sisyphus --- CMakeLists.txt | 10 +- TEST_REPORT.md | 83 ----- tests/CMakeLists.txt | 10 +- tests/autograd/CMakeLists.txt | 83 +++-- tests/autograd/test_autograd.cc | 126 ++++--- .../test_autograd_elementwise_backward.cc | 134 +++++++ .../test_autograd_elementwise_forward.cc | 187 ++++++++++ .../autograd/test_autograd_linear_backward.cc | 33 ++ .../autograd/test_autograd_linear_forward.cc | 41 +++ .../autograd/test_autograd_matmul_backward.cc | 42 +++ .../autograd/test_autograd_matmul_forward.cc | 48 +++ .../test_autograd_normalization_backward.cc | 34 ++ .../test_autograd_normalization_forward.cc | 40 +++ .../test_autograd_reduction_backward.cc | 66 ++++ .../test_autograd_reduction_forward.cc | 54 +++ .../test_autograd_softmax_backward.cc | 30 ++ .../autograd/test_autograd_softmax_forward.cc | 36 ++ .../test_autograd_transform_backward.cc | 21 ++ .../test_autograd_transform_forward.cc | 70 ++++ tests/common/test_macros.cmake | 97 +++++ tests/hook/CMakeLists.txt | 59 +--- tests/lora/CMakeLists.txt | 7 + tests/lora/test_lora.cc | 331 ++++++++++++++++++ tests/optimizer/CMakeLists.txt | 43 +-- tests/optimizer/test_optimizer.cc | 35 +- tests/optimizer/test_optimizer_creation.cc | 82 +++++ tests/optimizer/test_optimizer_cuda.cc | 93 +++++ tests/optimizer/test_optimizer_distributed.cc | 75 ++++ tests/optimizer/test_optimizer_step.cc | 62 ++++ tests/slow/CMakeLists.txt | 40 +-- tests/tensor/CMakeLists.txt | 117 ++++++- tests/tensor/test_tensor.cc | 135 +++++-- tests/tensor/test_tensor_copy.cc | 123 +++++++ tests/tensor/test_tensor_create.cc | 94 +++++ tests/tensor/test_tensor_delete.cc | 104 ++++++ 35 files changed, 2330 insertions(+), 315 deletions(-) delete mode 100644 TEST_REPORT.md create mode 100644 tests/autograd/test_autograd_elementwise_backward.cc create mode 100644 tests/autograd/test_autograd_elementwise_forward.cc create mode 100644 tests/autograd/test_autograd_linear_backward.cc create mode 100644 tests/autograd/test_autograd_linear_forward.cc create mode 100644 tests/autograd/test_autograd_matmul_backward.cc create mode 100644 tests/autograd/test_autograd_matmul_forward.cc create mode 100644 tests/autograd/test_autograd_normalization_backward.cc create mode 100644 tests/autograd/test_autograd_normalization_forward.cc create mode 100644 tests/autograd/test_autograd_reduction_backward.cc create mode 100644 tests/autograd/test_autograd_reduction_forward.cc create mode 100644 tests/autograd/test_autograd_softmax_backward.cc create mode 100644 tests/autograd/test_autograd_softmax_forward.cc create mode 100644 tests/autograd/test_autograd_transform_backward.cc create mode 100644 tests/autograd/test_autograd_transform_forward.cc create mode 100644 tests/common/test_macros.cmake create mode 100644 tests/lora/CMakeLists.txt create mode 100644 tests/lora/test_lora.cc create mode 100644 tests/optimizer/test_optimizer_creation.cc create mode 100644 tests/optimizer/test_optimizer_cuda.cc create mode 100644 tests/optimizer/test_optimizer_distributed.cc create mode 100644 tests/optimizer/test_optimizer_step.cc create mode 100644 tests/tensor/test_tensor_copy.cc create mode 100644 tests/tensor/test_tensor_create.cc create mode 100644 tests/tensor/test_tensor_delete.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 84056f76..9cd2ff75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,9 +42,9 @@ include_directories(${gflags_SOURCE_DIR}/include) set(WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE) set(WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE) add_subdirectory(third_party/glog) -add_compile_definitions(GLOG_USE_GLOG_EXPORT=1) +# add_compile_definitions(GLOG_USE_GLOG_EXPORT=1) include_directories(${glog_SOURCE_DIR}/src) -include_directories(${glog_BINARY_DIR}/glog) +# include_directories(${glog_BINARY_DIR}/glog) # eigen if(USE_OMP) @@ -66,8 +66,10 @@ endif() # Framework core sources (*.cc), excluding cpu kernels (they are built separately) file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/infini_train/src/*.cc) list(FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*") -list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*") -list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*") +if(NOT USE_CUDA) + list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*") + list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*") +endif() if(NOT USE_NCCL) list(FILTER SRC EXCLUDE REGEX ".*infini_train/src/core/ccl/cuda/.*") endif() diff --git a/TEST_REPORT.md b/TEST_REPORT.md deleted file mode 100644 index 2e7fda11..00000000 --- a/TEST_REPORT.md +++ /dev/null @@ -1,83 +0,0 @@ -# InfiniTrain 测试体系报告 - -## 1. 概述 -- 为主仓库提供了可扩展的 CTest + gtest 弹性测试骨架。 -- `BUILD_TEST` 开关保持默认启用,允许在关闭时跳过测试、在打开时统一构建所有 test 目标。 - -## 2. 架构与工程化 - -| 组件 | 说明 | -| --- | --- | -| CMake | 顶层 `CMakeLists.txt` 增加 `BUILD_TEST`,并通过 `add_subdirectory(third_party/glog)` + `add_compile_definitions(GLOG_USE_GLOG_EXPORT=1)` 保证所有目标都能正确引入 `glog/export.h`。`include_directories` 同时将 `glog` 的源目录和生成目录都纳入搜索路径。 | -| 二层分类 | 所有测试通过 `set_tests_properties(... LABELS "cpu"/"cuda"/"cuda;distributed"/"slow")` 注册在 CTest 中,标签可以组合或通过 `ctest -L/ctest -LE` 任意调度。 | -| 跳过宏 | `tests/common/test_utils.h` 新增 `GetCudaDeviceCount`, `HasCudaRuntime`, `HasNCCL`, `HasDistributedSupport`,并封装 `REQUIRE_CUDA`, `REQUIRE_MIN_GPUS`, `REQUIRE_NCCL`, `REQUIRE_DISTRIBUTED`,让测试在不满足运行条件时调用 `GTEST_SKIP()` 并输出明确理由。 | - -## 3. 目录与示例 - -``` -tests/ -├── common/ # test_utils.h,定义全局宏、fixture 与 helper -├── tensor/ # tensor_* 目标;cpu/cuda/distributed 测试共享一个 binary -├── optimizer/ # optimizer_* 目标,根据标签调度 -├── autograd/ # autograd_* 目标(CPU + optional CUDA/Distributed) -├── hook/ # hook_* + precision_check -└── slow/ # slow_cpu/cuda/distributed 示例,演示 slow 标签 -``` - -新增的 `tests/slow/test_slow.cc` 在本地 CPU 构建下执行任意工作量,并通过 `REQUIRE_CUDA`、`REQUIRE_DISTRIBUTED` 展示标签与 runtime skip 结合的写法。 - -## 4. 如何新增测试 -1. 在 `tests//` 下添加 `test_.cc`,`TEST` 中可以直接使用 `REQUIRE_` 宏组合运行时能力检查。 -2. `CMakeLists.txt` 中照例添加 executable、链接 gtest、主库 & 内核目标,并用 `add_test` + `set_tests_properties(... LABELS ...)` 绑定适当标签。 -3. `tests/CMakeLists.txt` 统一 `add_subdirectory()`,无须为每个标签写额外逻辑。 - -## 5. 样例运行 -- `cmake -S . -B build -DBUILD_TEST=ON -DUSE_CUDA=OFF -DUSE_NCCL=OFF` -- `cmake --build build` - -### 5.1 ctest -L cpu -``` -Test project /home/luoyue/InfiniTrain/build - Start 1005: tensor_cpu -1/6 Test #1005: tensor_cpu ....................... Passed 0.00 sec - Start 1018: slow_cpu -6/6 Test #1018: slow_cpu ......................... Passed 0.01 sec - -100% tests passed, 0 tests failed out of 6 - -Label Time Summary: -cpu = 0.04 sec*proc (6 tests) -slow = 0.01 sec*proc (1 test) -``` - -### 5.2 ctest -L slow -``` - Start 1018: slow_cpu -1/3 Test #1018: slow_cpu ......................... Passed 0.01 sec - Start 1019: slow_cuda -2/3 Test #1019: slow_cuda ........................ Passed 0.00 sec - Start 1020: slow_distributed -3/3 Test #1020: slow_distributed ................. Passed 0.00 sec - -100% tests passed, 0 tests failed out of 3 -``` - -### 5.3 ctest -L cuda -``` - Start 1006: tensor_cuda -10/10 Test #1020: slow_distributed ................. Passed 0.00 sec - -100% tests passed, 0 tests failed out of 10 -Label Time Summary: -cuda = 0.03 sec*proc (10 tests) -distributed = 0.02 sec*proc (5 tests) -slow = 0.01 sec*proc (2 tests) -``` - -### 5.4 ctest -LE distributed -- 该命令会跳过带 `distributed` 标签的测试(包括 slow_distributed)并运行剩余的 gflags + glog 验证套件。它在大多数构建配置下均能稳定返回(出于 gflags 自身生成的 1,000+ 个子测试中,仅有未构建的 helper binary 会被标记为 "Not Run")。 - -## 6. 运行要点 -- `REQUIRE_` 宏可以在单测中按需组合:CPU-only 逻辑不受影响,CUDA/Distributed 测试在无法满足环境时用 `GTEST_SKIP()` 退出。 -- 通过确保所有 标签 —— cpu、cuda、distributed、slow —— 在 CTest 中注册,并在 `ctest -L/ctest -LE` 中验证,测试调度逻辑可用于 CI 与本地快速切换。 -- 新增 `tests/slow/` 只是一个模板,后续模块可以复制该目录并替换为真实 workload,同时保留 slow 标签与跑步说明。 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a37f0913..dae5f972 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,18 +1,24 @@ # Tests CMakeLists.txt # This file manages the test infrastructure for InfiniTrain -# Add test subdirectories +# Include shared test macros (must be before any test subdirectory) +include(${CMAKE_CURRENT_SOURCE_DIR}/common/test_macros.cmake) + +# Common test utilities add_subdirectory(common) # Tensor tests add_subdirectory(tensor) -# Optimizer tests +# Optimizer tests add_subdirectory(optimizer) # Autograd operator tests add_subdirectory(autograd) +# LoRA tests +add_subdirectory(lora) + # Hook tests add_subdirectory(hook) diff --git a/tests/autograd/CMakeLists.txt b/tests/autograd/CMakeLists.txt index 6374e653..18a16fa2 100644 --- a/tests/autograd/CMakeLists.txt +++ b/tests/autograd/CMakeLists.txt @@ -1,27 +1,56 @@ -# Autograd operators test - -add_executable(test_autograd - test_autograd.cc -) -target_link_libraries(test_autograd - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_autograd PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_autograd PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME autograd_cpu COMMAND test_autograd) -set_tests_properties(autograd_cpu PROPERTIES LABELS "cpu") - -add_test(NAME autograd_cuda COMMAND test_autograd --gtest_filter=AutogradTest.*CUDA) -set_tests_properties(autograd_cuda PROPERTIES LABELS "cuda") - -add_test(NAME autograd_distributed COMMAND test_autograd --gtest_filter=AutogradTest.*Distributed) -set_tests_properties(autograd_distributed PROPERTIES LABELS "cuda;distributed") +# ============================================================================ +# Autograd tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Elementwise tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_elementwise_forward SOURCES test_autograd_elementwise_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_elementwise_backward SOURCES test_autograd_elementwise_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Matmul tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_matmul_forward SOURCES test_autograd_matmul_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_matmul_backward SOURCES test_autograd_matmul_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Reduction tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_reduction_forward SOURCES test_autograd_reduction_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_reduction_backward SOURCES test_autograd_reduction_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Linear tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_linear_forward SOURCES test_autograd_linear_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_linear_backward SOURCES test_autograd_linear_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Softmax tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_softmax_forward SOURCES test_autograd_softmax_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_softmax_backward SOURCES test_autograd_softmax_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Transform tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_transform_forward SOURCES test_autograd_transform_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_transform_backward SOURCES test_autograd_transform_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Normalization tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_normalization_forward SOURCES test_autograd_normalization_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_normalization_backward SOURCES test_autograd_normalization_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Legacy combined tests +# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现 +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_legacy SOURCES test_autograd.cc LABELS cpu cuda distributed) diff --git a/tests/autograd/test_autograd.cc b/tests/autograd/test_autograd.cc index e59bfb09..b46358da 100644 --- a/tests/autograd/test_autograd.cc +++ b/tests/autograd/test_autograd.cc @@ -18,7 +18,7 @@ using namespace infini_train; -class AutogradTest : public ::testing::Test { +class AutogradTestBase : public ::testing::Test { protected: static void SetUpTestSuite() { nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); @@ -38,7 +38,12 @@ class AutogradTest : public ::testing::Test { } }; -TEST_F(AutogradTest, AddForward) { +class AutogradForwardTest : public AutogradTestBase {}; +class AutogradBackwardTest : public AutogradTestBase {}; +class AutogradCudaTest : public AutogradTestBase {}; +class AutogradDistributedTest : public AutogradTestBase {}; + +TEST_F(AutogradForwardTest, AddForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto add_fn = std::make_shared(); @@ -47,7 +52,7 @@ TEST_F(AutogradTest, AddForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradTest, AddBackward) { +TEST_F(AutogradBackwardTest, AddBackward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto add_fn = std::make_shared(); @@ -57,7 +62,7 @@ TEST_F(AutogradTest, AddBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradTest, SubForward) { +TEST_F(AutogradForwardTest, SubForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto sub_fn = std::make_shared(); @@ -65,7 +70,7 @@ TEST_F(AutogradTest, SubForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MulForward) { +TEST_F(AutogradForwardTest, MulForward) { auto a = createTensor({2, 3}, 2.0f); auto b = createTensor({2, 3}, 3.0f); auto mul_fn = std::make_shared(); @@ -73,7 +78,7 @@ TEST_F(AutogradTest, MulForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MulBackward) { +TEST_F(AutogradBackwardTest, MulBackward) { auto a = createTensor({2, 3}, 2.0f); auto b = createTensor({2, 3}, 3.0f); auto mul_fn = std::make_shared(); @@ -83,7 +88,7 @@ TEST_F(AutogradTest, MulBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradTest, DivForward) { +TEST_F(AutogradForwardTest, DivForward) { auto a = createTensor({2, 3}, 6.0f); auto b = createTensor({2, 3}, 2.0f); auto div_fn = std::make_shared(); @@ -91,77 +96,77 @@ TEST_F(AutogradTest, DivForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, NegForward) { +TEST_F(AutogradForwardTest, NegForward) { auto a = createTensor({2, 3}, 5.0f); auto neg_fn = std::make_shared(); auto result = neg_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SinForward) { +TEST_F(AutogradForwardTest, SinForward) { auto a = createTensor({2, 3}, 0.0f); auto sin_fn = std::make_shared(); auto result = sin_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, CosForward) { +TEST_F(AutogradForwardTest, CosForward) { auto a = createTensor({2, 3}, 0.0f); auto cos_fn = std::make_shared(); auto result = cos_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, TanhForward) { +TEST_F(AutogradForwardTest, TanhForward) { auto a = createTensor({2, 3}, 0.0f); auto tanh_fn = std::make_shared(); auto result = tanh_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, ExpForward) { +TEST_F(AutogradForwardTest, ExpForward) { auto a = createTensor({2, 3}, 1.0f); auto exp_fn = std::make_shared(); auto result = exp_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, LogForward) { +TEST_F(AutogradForwardTest, LogForward) { auto a = createTensor({2, 3}, 2.0f); auto log_fn = std::make_shared(); auto result = log_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, ReciprocalForward) { +TEST_F(AutogradForwardTest, ReciprocalForward) { auto a = createTensor({2, 3}, 2.0f); auto reciprocal_fn = std::make_shared(); auto result = reciprocal_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, PowForward) { +TEST_F(AutogradForwardTest, PowForward) { auto a = createTensor({2, 3}, 2.0f); auto pow_fn = std::make_shared(2.0f); auto result = pow_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, RsqrtForward) { +TEST_F(AutogradForwardTest, RsqrtForward) { auto a = createTensor({2, 3}, 4.0f); auto rsqrt_fn = std::make_shared(); auto result = rsqrt_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SigmoidForward) { +TEST_F(AutogradForwardTest, SigmoidForward) { auto a = createTensor({2, 3}, 0.0f); auto sigmoid_fn = std::make_shared(); auto result = sigmoid_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MatmulForward) { +TEST_F(AutogradForwardTest, MatmulForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({3, 4}, 1.0f); auto matmul_fn = std::make_shared(); @@ -170,35 +175,35 @@ TEST_F(AutogradTest, MatmulForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradTest, SumForward) { +TEST_F(AutogradForwardTest, SumForward) { auto a = createTensor({2, 3}, 1.0f); auto sum_fn = std::make_shared(1, false); auto result = sum_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MeanForward) { +TEST_F(AutogradForwardTest, MeanForward) { auto a = createTensor({2, 3}, 1.0f); auto mean_fn = std::make_shared(1, false); auto result = mean_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MaxForward) { +TEST_F(AutogradForwardTest, MaxForward) { auto a = createTensor({2, 3}, 1.0f); auto max_fn = std::make_shared(1, false); auto result = max_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MinForward) { +TEST_F(AutogradForwardTest, MinForward) { auto a = createTensor({2, 3}, 1.0f); auto min_fn = std::make_shared(1, false); auto result = min_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SoftmaxForward) { +TEST_F(AutogradForwardTest, SoftmaxForward) { auto a = createTensor({2, 3}, 1.0f); auto softmax_fn = std::make_shared(1); auto result = softmax_fn->Apply({a}); @@ -206,7 +211,7 @@ TEST_F(AutogradTest, SoftmaxForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradTest, LayerNormForward) { +TEST_F(AutogradForwardTest, LayerNormForward) { auto a = createTensor({2, 3, 4}, 1.0f); auto weight = createTensor({4}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -215,7 +220,7 @@ TEST_F(AutogradTest, LayerNormForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, LinearForward) { +TEST_F(AutogradForwardTest, LinearForward) { auto input = createTensor({2, 3}, 1.0f); auto weight = createTensor({4, 3}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -225,7 +230,7 @@ TEST_F(AutogradTest, LinearForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradTest, TransposeForward) { +TEST_F(AutogradForwardTest, TransposeForward) { auto a = createTensor({2, 3}, 1.0f); auto transpose_fn = std::make_shared(0, 1); auto result = transpose_fn->Apply({a}); @@ -233,7 +238,7 @@ TEST_F(AutogradTest, TransposeForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); } -TEST_F(AutogradTest, SliceForward) { +TEST_F(AutogradForwardTest, SliceForward) { auto a = createTensor({4, 4}, 1.0f); auto slice_fn = std::make_shared( std::vector{1, 1}, @@ -243,14 +248,14 @@ TEST_F(AutogradTest, SliceForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SplitForward) { +TEST_F(AutogradForwardTest, SplitForward) { auto a = createTensor({4, 4}, 1.0f); auto split_fn = std::make_shared(2, 0); auto result = split_fn->Apply({a}); EXPECT_EQ(result.size(), 2); } -TEST_F(AutogradTest, ConcatForward) { +TEST_F(AutogradForwardTest, ConcatForward) { auto a = createTensor({2, 2}, 1.0f); auto b = createTensor({2, 2}, 2.0f); auto concat_fn = std::make_shared(0); @@ -259,7 +264,7 @@ TEST_F(AutogradTest, ConcatForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); } -TEST_F(AutogradTest, StackForward) { +TEST_F(AutogradForwardTest, StackForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto stack_fn = std::make_shared(0); @@ -268,21 +273,21 @@ TEST_F(AutogradTest, StackForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); } -TEST_F(AutogradTest, TrilForward) { +TEST_F(AutogradForwardTest, TrilForward) { auto a = createTensor({3, 3}, 1.0f); auto tril_fn = std::make_shared(0); auto result = tril_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, TriuForward) { +TEST_F(AutogradForwardTest, TriuForward) { auto a = createTensor({3, 3}, 1.0f); auto triu_fn = std::make_shared(0); auto result = triu_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, OuterForward) { +TEST_F(AutogradForwardTest, OuterForward) { auto a = createTensor({3}, 1.0f); auto b = createTensor({4}, 1.0f); auto outer_fn = std::make_shared(); @@ -291,21 +296,21 @@ TEST_F(AutogradTest, OuterForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{3, 4})); } -TEST_F(AutogradTest, AddScalarForward) { +TEST_F(AutogradForwardTest, AddScalarForward) { auto a = createTensor({2, 3}, 1.0f); auto add_scalar_fn = std::make_shared(2.0f); auto result = add_scalar_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MulScalarForward) { +TEST_F(AutogradForwardTest, MulScalarForward) { auto a = createTensor({2, 3}, 2.0f); auto mul_scalar_fn = std::make_shared(3.0f); auto result = mul_scalar_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, LtForward) { +TEST_F(AutogradForwardTest, LtForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto lt_fn = std::make_shared(); @@ -313,7 +318,7 @@ TEST_F(AutogradTest, LtForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, LeForward) { +TEST_F(AutogradForwardTest, LeForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto le_fn = std::make_shared(); @@ -321,7 +326,7 @@ TEST_F(AutogradTest, LeForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, GtForward) { +TEST_F(AutogradForwardTest, GtForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto gt_fn = std::make_shared(); @@ -329,7 +334,7 @@ TEST_F(AutogradTest, GtForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, GeForward) { +TEST_F(AutogradForwardTest, GeForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto ge_fn = std::make_shared(); @@ -337,7 +342,7 @@ TEST_F(AutogradTest, GeForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, EqualsForward) { +TEST_F(AutogradForwardTest, EqualsForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto eq_fn = std::make_shared(); @@ -345,7 +350,7 @@ TEST_F(AutogradTest, EqualsForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, AndForward) { +TEST_F(AutogradForwardTest, AndForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 1.0f); auto and_fn = std::make_shared(); @@ -353,7 +358,7 @@ TEST_F(AutogradTest, AndForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, OrForward) { +TEST_F(AutogradForwardTest, OrForward) { auto a = createTensor({2, 3}, 0.0f); auto b = createTensor({2, 3}, 1.0f); auto or_fn = std::make_shared(); @@ -361,7 +366,7 @@ TEST_F(AutogradTest, OrForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, NoOpForward) { +TEST_F(AutogradForwardTest, NoOpForward) { auto a = createTensor({2, 3}, 1.0f); auto noop_fn = std::make_shared(std::vector{2, 3}); auto result = noop_fn->Apply({a}); @@ -374,7 +379,7 @@ TEST_F(AutogradTest, NoOpForward) { // ============================================================================ #ifdef USE_CUDA -TEST_F(AutogradTest, AddForwardCUDA) { +TEST_F(AutogradCudaTest, AddForwardCUDA) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -393,7 +398,7 @@ TEST_F(AutogradTest, AddForwardCUDA) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradTest, MatmulForwardCUDA) { +TEST_F(AutogradCudaTest, MatmulForwardCUDA) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -412,7 +417,7 @@ TEST_F(AutogradTest, MatmulForwardCUDA) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradTest, SumForwardCUDA) { +TEST_F(AutogradCudaTest, SumForwardCUDA) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -424,7 +429,7 @@ TEST_F(AutogradTest, SumForwardCUDA) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SoftmaxForwardCUDA) { +TEST_F(AutogradCudaTest, SoftmaxForwardCUDA) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -437,7 +442,7 @@ TEST_F(AutogradTest, SoftmaxForwardCUDA) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradTest, LinearForwardCUDA) { +TEST_F(AutogradCudaTest, LinearForwardCUDA) { auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); input->set_requires_grad(true); @@ -468,7 +473,10 @@ TEST_F(AutogradTest, LinearForwardCUDA) { // ============================================================================ #ifdef USE_NCCL -TEST_F(AutogradTest, AllReduceDistributed) { +TEST_F(AutogradDistributedTest, AllReduceDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -479,7 +487,10 @@ TEST_F(AutogradTest, AllReduceDistributed) { EXPECT_TRUE(a->requires_grad()); } -TEST_F(AutogradTest, AllGatherDistributed) { +TEST_F(AutogradDistributedTest, AllGatherDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -490,7 +501,10 @@ TEST_F(AutogradTest, AllGatherDistributed) { EXPECT_EQ(a->Dims(), (std::vector{4, 4})); } -TEST_F(AutogradTest, ReduceScatterDistributed) { +TEST_F(AutogradDistributedTest, ReduceScatterDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -501,7 +515,10 @@ TEST_F(AutogradTest, ReduceScatterDistributed) { EXPECT_EQ(a->Dims(), (std::vector{2, 8})); } -TEST_F(AutogradTest, DistributedMatmul) { +TEST_F(AutogradDistributedTest, DistributedMatmul) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto a = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -516,7 +533,10 @@ TEST_F(AutogradTest, DistributedMatmul) { EXPECT_TRUE(result[0]->IsCUDA()); } -TEST_F(AutogradTest, DistributedLinear) { +TEST_F(AutogradDistributedTest, DistributedLinear) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); input->set_requires_grad(true); diff --git a/tests/autograd/test_autograd_elementwise_backward.cc b/tests/autograd/test_autograd_elementwise_backward.cc new file mode 100644 index 00000000..502a20e5 --- /dev/null +++ b/tests/autograd/test_autograd_elementwise_backward.cc @@ -0,0 +1,134 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/elementwise.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradElementwiseBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradElementwiseBackwardTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, SubBackward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = sub_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, DivBackward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = div_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, NegBackward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = neg_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, SinBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = sin_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, CosBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = cos_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, TanhBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = tanh_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, ExpBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = exp_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, LogBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = log_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, ReciprocalBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = reciprocal_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, PowBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = pow_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, RsqrtBackward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = rsqrt_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_elementwise_forward.cc b/tests/autograd/test_autograd_elementwise_forward.cc new file mode 100644 index 00000000..63b386b1 --- /dev/null +++ b/tests/autograd/test_autograd_elementwise_forward.cc @@ -0,0 +1,187 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/activations.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradElementwiseForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradElementwiseForwardTest, AddForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradElementwiseForwardTest, SubForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, MulForward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, DivForward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, NegForward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, SinForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, CosForward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, TanhForward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, ExpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LogForward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, ReciprocalForward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, PowForward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, RsqrtForward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, SigmoidForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sigmoid_fn = std::make_shared(); + auto result = sigmoid_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, AddScalarForward) { + auto a = createTensor({2, 3}, 1.0f); + auto add_scalar_fn = std::make_shared(2.0f); + auto result = add_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, MulScalarForward) { + auto a = createTensor({2, 3}, 2.0f); + auto mul_scalar_fn = std::make_shared(3.0f); + auto result = mul_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto lt_fn = std::make_shared(); + auto result = lt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto le_fn = std::make_shared(); + auto result = le_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, GtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto gt_fn = std::make_shared(); + auto result = gt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, GeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto ge_fn = std::make_shared(); + auto result = ge_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, EqualsForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto eq_fn = std::make_shared(); + auto result = eq_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, AndForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 1.0f); + auto and_fn = std::make_shared(); + auto result = and_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, OrForward) { + auto a = createTensor({2, 3}, 0.0f); + auto b = createTensor({2, 3}, 1.0f); + auto or_fn = std::make_shared(); + auto result = or_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/autograd/test_autograd_linear_backward.cc b/tests/autograd/test_autograd_linear_backward.cc new file mode 100644 index 00000000..069affc7 --- /dev/null +++ b/tests/autograd/test_autograd_linear_backward.cc @@ -0,0 +1,33 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/linear.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradLinearBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradLinearBackwardTest, LinearBackward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = linear_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +TEST_F(AutogradLinearBackwardTest, LinearBackwardNoBias) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = linear_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} diff --git a/tests/autograd/test_autograd_linear_forward.cc b/tests/autograd/test_autograd_linear_forward.cc new file mode 100644 index 00000000..efd8d6eb --- /dev/null +++ b/tests/autograd/test_autograd_linear_forward.cc @@ -0,0 +1,41 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/linear.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradLinearForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradLinearForwardTest, LinearForward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradLinearForwardTest, LinearNoBias) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradLinearForwardTest, LinearBatch) { + auto input = createTensor({32, 128}, 1.0f); + auto weight = createTensor({64, 128}, 1.0f); + auto bias = createTensor({64}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{32, 64})); +} diff --git a/tests/autograd/test_autograd_matmul_backward.cc b/tests/autograd/test_autograd_matmul_backward.cc new file mode 100644 index 00000000..e9962f5d --- /dev/null +++ b/tests/autograd/test_autograd_matmul_backward.cc @@ -0,0 +1,42 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/matmul.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradMatmulBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradMatmulBackwardTest, MatmulBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradMatmulBackwardTest, MatmulBackwardSquare) { + auto a = createTensor({3, 3}, 2.0f); + auto b = createTensor({3, 3}, 3.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({3, 3}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradMatmulBackwardTest, MatmulBackwardDifferentShapes) { + auto a = createTensor({3, 4}, 1.5f); + auto b = createTensor({4, 2}, 2.5f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({3, 2}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} diff --git a/tests/autograd/test_autograd_matmul_forward.cc b/tests/autograd/test_autograd_matmul_forward.cc new file mode 100644 index 00000000..87c93f08 --- /dev/null +++ b/tests/autograd/test_autograd_matmul_forward.cc @@ -0,0 +1,48 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/matmul.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradMatmulForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradMatmulForwardTest, MatmulForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulDifferentShapes) { + auto a = createTensor({3, 4}, 1.0f); + auto b = createTensor({4, 2}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulBatch) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto b = createTensor({2, 4, 5}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 5})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulSquare) { + auto a = createTensor({3, 3}, 1.0f); + auto b = createTensor({3, 3}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 3})); +} diff --git a/tests/autograd/test_autograd_normalization_backward.cc b/tests/autograd/test_autograd_normalization_backward.cc new file mode 100644 index 00000000..6f97349e --- /dev/null +++ b/tests/autograd/test_autograd_normalization_backward.cc @@ -0,0 +1,34 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/normalization.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradNormalizationBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradNormalizationBackwardTest, LayerNormBackward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + auto grad = createTensor({2, 3, 4}, 1.0f); + auto grad_inputs = layernorm_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +TEST_F(AutogradNormalizationBackwardTest, LayerNormBackwardZeroBias) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + auto grad = createTensor({2, 3, 4}, 1.0f); + auto grad_inputs = layernorm_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} diff --git a/tests/autograd/test_autograd_normalization_forward.cc b/tests/autograd/test_autograd_normalization_forward.cc new file mode 100644 index 00000000..d58fd749 --- /dev/null +++ b/tests/autograd/test_autograd_normalization_forward.cc @@ -0,0 +1,40 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/normalization.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradNormalizationForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradNormalizationForwardTest, LayerNormForward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradNormalizationForwardTest, LayerNormZeroBias) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradNormalizationForwardTest, LayerNormThreeDim) { + auto a = createTensor({2, 1, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 1, 4})); +} diff --git a/tests/autograd/test_autograd_reduction_backward.cc b/tests/autograd/test_autograd_reduction_backward.cc new file mode 100644 index 00000000..d212a065 --- /dev/null +++ b/tests/autograd/test_autograd_reduction_backward.cc @@ -0,0 +1,66 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/reduction.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradReductionBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradReductionBackwardTest, SumBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = sum_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MeanBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = mean_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MaxBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = max_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MinBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = min_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, SumBackwardKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, true); + auto result = sum_fn->Apply({a}); + auto grad = createTensor({2, 1}, 1.0f); + auto grad_inputs = sum_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MeanBackwardKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, true); + auto result = mean_fn->Apply({a}); + auto grad = createTensor({2, 1}, 1.0f); + auto grad_inputs = mean_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_reduction_forward.cc b/tests/autograd/test_autograd_reduction_forward.cc new file mode 100644 index 00000000..b4f8edb7 --- /dev/null +++ b/tests/autograd/test_autograd_reduction_forward.cc @@ -0,0 +1,54 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/reduction.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradReductionForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradReductionForwardTest, SumForward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MeanForward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MinForward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, SumKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, true); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MeanKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, true); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/autograd/test_autograd_softmax_backward.cc b/tests/autograd/test_autograd_softmax_backward.cc new file mode 100644 index 00000000..6d3f02a4 --- /dev/null +++ b/tests/autograd/test_autograd_softmax_backward.cc @@ -0,0 +1,30 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/softmax.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradSoftmaxBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = softmax_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackwardDim0) { + auto a = createTensor({4, 3}, 1.0f); + auto softmax_fn = std::make_shared(0); + auto result = softmax_fn->Apply({a}); + auto grad = createTensor({4, 3}, 1.0f); + auto grad_inputs = softmax_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_softmax_forward.cc b/tests/autograd/test_autograd_softmax_forward.cc new file mode 100644 index 00000000..c3d196f1 --- /dev/null +++ b/tests/autograd/test_autograd_softmax_forward.cc @@ -0,0 +1,36 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/softmax.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradSoftmaxForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxDim0) { + auto a = createTensor({4, 3}, 1.0f); + auto softmax_fn = std::make_shared(0); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 3})); +} + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxLastDim) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto softmax_fn = std::make_shared(2); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 4})); +} diff --git a/tests/autograd/test_autograd_transform_backward.cc b/tests/autograd/test_autograd_transform_backward.cc new file mode 100644 index 00000000..1613f1a2 --- /dev/null +++ b/tests/autograd/test_autograd_transform_backward.cc @@ -0,0 +1,21 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradTransformBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradTransformBackwardTest, TransposeBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + auto grad = createTensor({3, 2}, 1.0f); + auto grad_inputs = transpose_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_transform_forward.cc b/tests/autograd/test_autograd_transform_forward.cc new file mode 100644 index 00000000..67b20adb --- /dev/null +++ b/tests/autograd/test_autograd_transform_forward.cc @@ -0,0 +1,70 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/autograd/misc.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradTransformForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradTransformForwardTest, TransposeForward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradTransformForwardTest, SliceForward) { + auto a = createTensor({4, 4}, 1.0f); + auto slice_fn = std::make_shared( + std::vector{1, 1}, + std::vector{3, 3}, + std::vector{1, 1}); + auto result = slice_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTransformForwardTest, SplitForward) { + auto a = createTensor({4, 4}, 1.0f); + auto split_fn = std::make_shared(2, 0); + auto result = split_fn->Apply({a}); + EXPECT_EQ(result.size(), 2); +} + +TEST_F(AutogradTransformForwardTest, ConcatForward) { + auto a = createTensor({2, 2}, 1.0f); + auto b = createTensor({2, 2}, 2.0f); + auto concat_fn = std::make_shared(0); + auto result = concat_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); +} + +TEST_F(AutogradTransformForwardTest, StackForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto stack_fn = std::make_shared(0); + auto result = stack_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); +} + +TEST_F(AutogradTransformForwardTest, TrilForward) { + auto a = createTensor({3, 3}, 1.0f); + auto tril_fn = std::make_shared(0); + auto result = tril_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTransformForwardTest, TriuForward) { + auto a = createTensor({3, 3}, 1.0f); + auto triu_fn = std::make_shared(0); + auto result = triu_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/common/test_macros.cmake b/tests/common/test_macros.cmake new file mode 100644 index 00000000..184083c0 --- /dev/null +++ b/tests/common/test_macros.cmake @@ -0,0 +1,97 @@ +# ============================================================================ +# InfiniTrain 测试宏 +# ============================================================================ +# 提供统一的测试配置接口,降低接入成本 +# +# 使用方法: +# 1. 在 tests/CMakeLists.txt 中 include 此文件 +# 2. 使用 infini_train_add_test 宏注册测试 +# +# 示例: +# infini_train_add_test( +# test_tensor_create +# SOURCES test_tensor_create.cc +# LABELS cpu cuda +# ) +# ============================================================================ + +include_guard(GLOBAL) + +# 获取 test_macros.cmake 所在目录(tests/common/) +set(TEST_MACROS_DIR "${CMAKE_CURRENT_LIST_DIR}") + +# ----------------------------------------------------------------------------- +# 加载 GoogleTest 模块(提供 gtest_discover_tests) +# ----------------------------------------------------------------------------- +include(GoogleTest) + +# ----------------------------------------------------------------------------- +# infini_train_add_test - 测试注册宏 +# ----------------------------------------------------------------------------- +# 功能: +# 1. 创建可执行文件 +# 2. 配置编译选项、链接库和头文件路径 +# 3. 使用 gtest_discover_tests 自动发现测试用例 +# 4. 设置测试标签 +# +# 参数: +# SOURCES: 源文件列表(必填) +# LABELS: 测试标签,如 "cpu" "cuda" "distributed"(可选,默认 "cpu") +# +# 示例: +# # 简单测试(1行) +# infini_train_add_test(test_example SOURCES test_example.cc LABELS cpu) +# +# # 多标签测试 +# infini_train_add_test(test_cuda_example SOURCES test_cuda.cc LABELS cuda distributed) +# ----------------------------------------------------------------------------- +macro(infini_train_add_test) + cmake_parse_arguments(ARG "" "TEST_NAME" "SOURCES;LABELS" ${ARGN}) + + if(NOT ARG_TEST_NAME) + set(ARG_TEST_NAME ${ARG_UNPARSED_ARGUMENTS}) + endif() + + if(NOT ARG_SOURCES) + message(FATAL_ERROR "infini_train_add_test: TEST_NAME and SOURCES are required") + endif() + + # 1. 创建可执行文件 + add_executable(${ARG_TEST_NAME} ${ARG_SOURCES}) + + # 2. 配置编译选项(禁用警告转错误,以便在宽松编译环境下运行) + target_compile_options(${ARG_TEST_NAME} PRIVATE -Wno-error) + + # 3. 链接 Google Test + target_link_libraries(${ARG_TEST_NAME} PRIVATE + GTest::gtest + GTest::gtest_main + ) + + # 4. 添加头文件路径 + target_include_directories(${ARG_TEST_NAME} PRIVATE + ${TEST_MACROS_DIR} + ${glog_SOURCE_DIR}/src + ) + + # 5. 链接项目库(whole-archive 方式解决静态库符号依赖) + target_link_libraries(${ARG_TEST_NAME} PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" + ) + + # 6. 使用 gtest_discover_tests 自动发现测试用例 + # 这会自动为每个 TEST_F() 创建一个 ctest 测试 + set(labels "cpu") + if(ARG_LABELS) + set(labels "${ARG_LABELS}") + endif() + + gtest_discover_tests(${ARG_TEST_NAME} + # 自动将测试输出重定向到 XML(便于 CI 集成) + EXTRA_ARGS --gtest_output=xml:%T.xml + PROPERTIES LABELS "${labels}" + ) +endmacro() diff --git a/tests/hook/CMakeLists.txt b/tests/hook/CMakeLists.txt index e567608b..51c77ae0 100644 --- a/tests/hook/CMakeLists.txt +++ b/tests/hook/CMakeLists.txt @@ -1,47 +1,14 @@ +# ============================================================================ # Hook tests - -add_executable(test_hook - test_hook.cc -) -target_link_libraries(test_hook - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_hook PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_hook PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME hook_cpu COMMAND test_hook) -set_tests_properties(hook_cpu PROPERTIES LABELS "cpu") - -add_test(NAME hook_cuda COMMAND test_hook --gtest_filter=HookTest.*CUDA) -set_tests_properties(hook_cuda PROPERTIES LABELS "cuda") - -add_test(NAME hook_distributed COMMAND test_hook --gtest_filter=HookTest.*Distributed) -set_tests_properties(hook_distributed PROPERTIES LABELS "cuda;distributed") - -add_executable(test_precision_check - test_precision_check.cc -) -target_link_libraries(test_precision_check - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_precision_check PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_precision_check PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME precision_check_cpu COMMAND test_precision_check) -set_tests_properties(precision_check_cpu PROPERTIES LABELS "cpu") +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Hook tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_hook SOURCES test_hook.cc LABELS cpu cuda distributed) +infini_train_add_test(test_precision_check SOURCES test_precision_check.cc LABELS cpu) diff --git a/tests/lora/CMakeLists.txt b/tests/lora/CMakeLists.txt new file mode 100644 index 00000000..27b47c64 --- /dev/null +++ b/tests/lora/CMakeLists.txt @@ -0,0 +1,7 @@ +# ============================================================================ +# LoRA tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# ============================================================================ + +infini_train_add_test(test_lora SOURCES test_lora.cc LABELS cpu) diff --git a/tests/lora/test_lora.cc b/tests/lora/test_lora.cc new file mode 100644 index 00000000..19c24efd --- /dev/null +++ b/tests/lora/test_lora.cc @@ -0,0 +1,331 @@ +#include + +#include +#include +#include + +#include "infini_train/include/nn/lora/lora_config.h" +#include "infini_train/include/nn/lora/lora_linear.h" +#include "infini_train/include/nn/lora/lora_utils.h" +#include "infini_train/include/nn/modules/container.h" +#include "infini_train/include/nn/modules/linear.h" +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "test_utils.h" + +using namespace infini_train; +using namespace infini_train::nn::lora; + +class LoRATest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(LoRATest, LoRAConfigScaling) { + LoRAConfig config; + config.rank = 8; + config.alpha = 16.0f; + + float expected_scaling = 16.0f / 8.0f; + EXPECT_EQ(config.Scaling(), expected_scaling); +} + +TEST_F(LoRATest, LoRAConfigShouldApply) { + LoRAConfig config; + config.rank = 8; + config.alpha = 16.0f; + + EXPECT_TRUE(config.ShouldApplyLoRA("c_attn")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_attn")); + EXPECT_TRUE(config.ShouldApplyLoRA("c_proj")); + EXPECT_FALSE(config.ShouldApplyLoRA("c_fc")); + EXPECT_FALSE(config.ShouldApplyLoRA("random_layer")); +} + +TEST_F(LoRATest, LoRALinearFromModel) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto *lora_linear = dynamic_cast(model.get()); + ASSERT_NE(lora_linear, nullptr); + + EXPECT_EQ(lora_linear->in_features(), 64); + EXPECT_EQ(lora_linear->out_features(), 128); + EXPECT_EQ(lora_linear->rank(), 4); + + auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); + auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); + auto weight = lora_linear->parameter(nn::Linear::kParamWeightName); + + EXPECT_EQ(lora_A->Dims()[0], config.rank); + EXPECT_EQ(lora_A->Dims()[1], 64); + EXPECT_EQ(lora_B->Dims()[0], 128); + EXPECT_EQ(lora_B->Dims()[1], config.rank); + + EXPECT_FALSE(weight->requires_grad()); + EXPECT_TRUE(lora_A->requires_grad()); + EXPECT_TRUE(lora_B->requires_grad()); + + auto params = lora_linear->LoRAParameters(); + EXPECT_EQ(params.size(), 2); +} + +TEST_F(LoRATest, LoRALinearForward) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto input = std::make_shared(std::vector{2, 10, 64}, DataType::kFLOAT32); + + auto output = (*model)({input})[0]; + + EXPECT_EQ(output->Dims().size(), 3); + EXPECT_EQ(output->Dims()[0], 2); + EXPECT_EQ(output->Dims()[1], 10); + EXPECT_EQ(output->Dims()[2], 128); +} + +TEST_F(LoRATest, LoRALinearMerge) { + auto base_linear = std::make_shared(32, 64, /*bias=*/false); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto *lora_linear = dynamic_cast(model.get()); + ASSERT_NE(lora_linear, nullptr); + + auto input = std::make_shared(std::vector{2, 5, 32}, DataType::kFLOAT32); + input->EigenMatrix().setRandom(); + + auto output_before = (*model)({input})[0]; + float output_before_sum = output_before->EigenMatrix().sum(); + + EXPECT_FALSE(lora_linear->IsMerged()); + MergeLoRAWeights(model); + EXPECT_TRUE(lora_linear->IsMerged()); + + auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); + auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); + EXPECT_FALSE(lora_A->requires_grad()); + EXPECT_FALSE(lora_B->requires_grad()); + + auto output_merged = (*model)({input})[0]; + float output_merged_sum = output_merged->EigenMatrix().sum(); + EXPECT_NEAR(std::abs(output_before_sum - output_merged_sum), 0.0f, 1e-3); + + UnmergeLoRAWeights(model); + EXPECT_FALSE(lora_linear->IsMerged()); + EXPECT_TRUE(lora_A->requires_grad()); + EXPECT_TRUE(lora_B->requires_grad()); + + auto output_unmerged = (*model)({input})[0]; + EXPECT_EQ(output_before->Dims(), output_unmerged->Dims()); +} + +TEST_F(LoRATest, LoRAUtils) { + auto base_linear = std::make_shared(32, 64, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto lora_params = GetLoRAParameters(model); + EXPECT_EQ(lora_params.size(), 2); + + int64_t trainable = CountTrainableParameters(model); + int64_t expected_trainable = config.rank * 32 + 64 * config.rank; + EXPECT_EQ(trainable, expected_trainable); + + int64_t total = CountTotalParameters(model); + int64_t expected_total = 64 * 32 + 64 + config.rank * 32 + 64 * config.rank; + EXPECT_EQ(total, expected_total); +} + +TEST_F(LoRATest, ParseLoRATargetModules) { + auto modules = ParseLoRATargetModules("c_attn"); + EXPECT_EQ(modules.size(), 1); + EXPECT_TRUE(modules.count("c_attn")); + + modules = ParseLoRATargetModules("c_attn,c_proj,c_fc"); + EXPECT_EQ(modules.size(), 3); + EXPECT_TRUE(modules.count("c_attn")); + EXPECT_TRUE(modules.count("c_proj")); + EXPECT_TRUE(modules.count("c_fc")); + + modules = ParseLoRATargetModules("c_attn, c_proj , c_fc"); + EXPECT_EQ(modules.size(), 3); + + modules = ParseLoRATargetModules("c_attn,,c_proj"); + EXPECT_EQ(modules.size(), 2); +} + +TEST_F(LoRATest, ShouldApplyLoRAEdgeCases) { + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,attn.c_proj")}; + EXPECT_TRUE(config.ShouldApplyLoRA("attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); + EXPECT_FALSE(config.ShouldApplyLoRA("mlp.c_proj")); + } + + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,mlp.c_proj")}; + EXPECT_FALSE(config.ShouldApplyLoRA("attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("mlp.c_proj")); + } + + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,c_proj")}; + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); + } +} + +TEST_F(LoRATest, FreezeUnfreeze) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto all_params = model->Parameters(); + + int64_t total_trainable = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + total_trainable += p->NumElements(); + } + } + int64_t expected = config.rank * 64 + 128 * config.rank; + EXPECT_EQ(total_trainable, expected); + + FreezeBaseModel(model); + + int64_t after_freeze = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + after_freeze += p->NumElements(); + } + } + EXPECT_EQ(after_freeze, expected); + + UnfreezeModel(model); + int64_t after_unfreeze = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + after_unfreeze += p->NumElements(); + } + } + int64_t expected_unfreeze = 64 * 128 + 128 + config.rank * 64 + 128 * config.rank; + EXPECT_EQ(after_unfreeze, expected_unfreeze); +} + +TEST_F(LoRATest, LoRAStateDict) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto state_dict = model->StateDict(); + + EXPECT_TRUE(state_dict.count("weight")); + EXPECT_TRUE(state_dict.count("bias")); + EXPECT_TRUE(state_dict.count("lora_A")); + EXPECT_TRUE(state_dict.count("lora_B")); + + EXPECT_TRUE(state_dict.at("lora_A")->requires_grad()); + EXPECT_TRUE(state_dict.at("lora_B")->requires_grad()); + EXPECT_FALSE(state_dict.at("weight")->requires_grad()); + + EXPECT_EQ(state_dict.at("lora_A")->Dims()[0], config.rank); + EXPECT_EQ(state_dict.at("lora_A")->Dims()[1], 64); + EXPECT_EQ(state_dict.at("lora_B")->Dims()[0], 128); + EXPECT_EQ(state_dict.at("lora_B")->Dims()[1], config.rank); +} + +TEST_F(LoRATest, GetLoRAModel) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + EXPECT_NE(model, nullptr); + + auto lora_params = GetLoRAParameters(model); + EXPECT_EQ(lora_params.size(), 2); + + int64_t total_elements = 0; + for (const auto &t : lora_params) { + total_elements += t->NumElements(); + } + int64_t expected_elements = config.rank * 64 + 128 * config.rank; + EXPECT_EQ(total_elements, expected_elements); + + MergeLoRAWeights(model); + auto *lora_mod = dynamic_cast(model.get()); + EXPECT_NE(lora_mod, nullptr); + EXPECT_FALSE(lora_mod->LoRAParameters()[0]->requires_grad()); + + UnmergeLoRAWeights(model); + EXPECT_TRUE(lora_mod->LoRAParameters()[0]->requires_grad()); +} + +TEST_F(LoRATest, MergeAndUnload) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + auto model = GetLoRAModel(base_linear, config); + + EXPECT_NE(dynamic_cast(model.get()), nullptr); + + auto input = std::make_shared(std::vector{2, 5, 64}, DataType::kFLOAT32); + input->EigenMatrix().setRandom(); + auto output_before = (*model)({input})[0]; + float output_before_sum = output_before->EigenMatrix().sum(); + + auto unloaded_model = MergeAndUnload(model); + EXPECT_NE(unloaded_model, nullptr); + EXPECT_EQ(dynamic_cast(unloaded_model.get()), nullptr); + + auto state_dict = unloaded_model->StateDict(); + for (const auto &[name, param] : state_dict) { + EXPECT_EQ(name.find("lora_A"), std::string::npos); + EXPECT_EQ(name.find("lora_B"), std::string::npos); + } + + auto output_after = (*unloaded_model)({input})[0]; + float output_after_sum = output_after->EigenMatrix().sum(); + EXPECT_NEAR(std::abs(output_before_sum - output_after_sum), 0.0f, 1e-3); + + for (const auto ¶m : unloaded_model->Parameters()) { + EXPECT_TRUE(param->requires_grad()); + } +} diff --git a/tests/optimizer/CMakeLists.txt b/tests/optimizer/CMakeLists.txt index c17d1a8d..3dc00354 100644 --- a/tests/optimizer/CMakeLists.txt +++ b/tests/optimizer/CMakeLists.txt @@ -1,27 +1,22 @@ +# ============================================================================ # Optimizer tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ -add_executable(test_optimizer - test_optimizer.cc -) -target_link_libraries(test_optimizer - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_optimizer PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_optimizer PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME optimizer_cpu COMMAND test_optimizer) -set_tests_properties(optimizer_cpu PROPERTIES LABELS "cpu") - -add_test(NAME optimizer_cuda COMMAND test_optimizer --gtest_filter=OptimizerTest.*CUDA) -set_tests_properties(optimizer_cuda PROPERTIES LABELS "cuda") +# ----------------------------------------------------------------------------- +# Optimizer tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_optimizer_creation SOURCES test_optimizer_creation.cc LABELS cpu) +infini_train_add_test(test_optimizer_step SOURCES test_optimizer_step.cc LABELS cpu) +infini_train_add_test(test_optimizer_cuda SOURCES test_optimizer_cuda.cc LABELS cuda) +infini_train_add_test(test_optimizer_distributed SOURCES test_optimizer_distributed.cc LABELS cuda distributed) -add_test(NAME optimizer_distributed COMMAND test_optimizer --gtest_filter=OptimizerTest.*Distributed) -set_tests_properties(optimizer_distributed PROPERTIES LABELS "cuda;distributed") +# ----------------------------------------------------------------------------- +# Legacy combined tests +# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现 +# ----------------------------------------------------------------------------- +infini_train_add_test(test_optimizer_legacy SOURCES test_optimizer.cc LABELS cpu cuda distributed) diff --git a/tests/optimizer/test_optimizer.cc b/tests/optimizer/test_optimizer.cc index 836fee91..6baa34e5 100644 --- a/tests/optimizer/test_optimizer.cc +++ b/tests/optimizer/test_optimizer.cc @@ -9,14 +9,19 @@ using namespace infini_train; -class OptimizerTest : public ::testing::Test { +class OptimizerTestBase : public ::testing::Test { protected: static void SetUpTestSuite() { nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); } }; -TEST_F(OptimizerTest, SGDCreation) { +class OptimizerCreationTest : public OptimizerTestBase {}; +class OptimizerGradTest : public OptimizerTestBase {}; +class OptimizerCudaTest : public OptimizerTestBase {}; +class OptimizerDistributedTest : public OptimizerTestBase {}; + +TEST_F(OptimizerCreationTest, SGDCreation) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); param->set_requires_grad(true); @@ -27,7 +32,7 @@ TEST_F(OptimizerTest, SGDCreation) { EXPECT_NE(optimizer, nullptr); } -TEST_F(OptimizerTest, AdamCreation) { +TEST_F(OptimizerCreationTest, AdamCreation) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); param->set_requires_grad(true); @@ -38,7 +43,7 @@ TEST_F(OptimizerTest, AdamCreation) { EXPECT_NE(optimizer, nullptr); } -TEST_F(OptimizerTest, ZeroGrad) { +TEST_F(OptimizerGradTest, ZeroGrad) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); param->set_requires_grad(true); @@ -49,7 +54,7 @@ TEST_F(OptimizerTest, ZeroGrad) { optimizer->ZeroGrad(); } -TEST_F(OptimizerTest, SGDMultiParams) { +TEST_F(OptimizerCreationTest, SGDMultiParams) { std::vector> params; for (int i = 0; i < 3; ++i) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -64,7 +69,7 @@ TEST_F(OptimizerTest, SGDMultiParams) { optimizer->ZeroGrad(); } -TEST_F(OptimizerTest, SGDCreationCUDA) { +TEST_F(OptimizerCudaTest, SGDCreationCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -79,7 +84,7 @@ TEST_F(OptimizerTest, SGDCreationCUDA) { #endif } -TEST_F(OptimizerTest, AdamCreationCUDA) { +TEST_F(OptimizerCudaTest, AdamCreationCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -94,7 +99,7 @@ TEST_F(OptimizerTest, AdamCreationCUDA) { #endif } -TEST_F(OptimizerTest, ZeroGradCUDA) { +TEST_F(OptimizerCudaTest, ZeroGradCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -109,7 +114,7 @@ TEST_F(OptimizerTest, ZeroGradCUDA) { #endif } -TEST_F(OptimizerTest, SGDMultiParamsCUDA) { +TEST_F(OptimizerCudaTest, SGDMultiParamsCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) std::vector> params; @@ -127,8 +132,10 @@ TEST_F(OptimizerTest, SGDMultiParamsCUDA) { #endif } -TEST_F(OptimizerTest, DistributedSGD) { +TEST_F(OptimizerDistributedTest, DistributedSGD) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); @@ -142,8 +149,10 @@ TEST_F(OptimizerTest, DistributedSGD) { #endif } -TEST_F(OptimizerTest, DistributedAdam) { +TEST_F(OptimizerDistributedTest, DistributedAdam) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto param = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); @@ -157,8 +166,10 @@ TEST_F(OptimizerTest, DistributedAdam) { #endif } -TEST_F(OptimizerTest, DistributedZeroGrad) { +TEST_F(OptimizerDistributedTest, DistributedZeroGrad) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); diff --git a/tests/optimizer/test_optimizer_creation.cc b/tests/optimizer/test_optimizer_creation.cc new file mode 100644 index 00000000..fdb69d70 --- /dev/null +++ b/tests/optimizer/test_optimizer_creation.cc @@ -0,0 +1,82 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerCreationTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerCreationTest, SGDCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, AdamCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDWithMomentum) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, AdamMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.001); + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDWithWeightDecay) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} diff --git a/tests/optimizer/test_optimizer_cuda.cc b/tests/optimizer/test_optimizer_cuda.cc new file mode 100644 index 00000000..b56ace2e --- /dev/null +++ b/tests/optimizer/test_optimizer_cuda.cc @@ -0,0 +1,93 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerCUDATest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerCUDATest, SGDCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, AdamCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, ZeroGradCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, SGDMultiParamsCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerCUDATest, AdamStepCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + optimizer->ZeroGrad(); + optimizer->Step(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} diff --git a/tests/optimizer/test_optimizer_distributed.cc b/tests/optimizer/test_optimizer_distributed.cc new file mode 100644 index 00000000..f4f092bc --- /dev/null +++ b/tests/optimizer/test_optimizer_distributed.cc @@ -0,0 +1,75 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerDistributedTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerDistributedTest, DistributedSGD) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedAdam) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedZeroGrad) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedMultiParams) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + std::vector> params; + for (int i = 0; i < 2; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); + optimizer->Step(); +#endif +} diff --git a/tests/optimizer/test_optimizer_step.cc b/tests/optimizer/test_optimizer_step.cc new file mode 100644 index 00000000..d5407998 --- /dev/null +++ b/tests/optimizer/test_optimizer_step.cc @@ -0,0 +1,62 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerStepTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerStepTest, SGDStep) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + optimizer->Step(); +} + +TEST_F(OptimizerStepTest, AdamStep) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + optimizer->ZeroGrad(); + optimizer->Step(); +} + +TEST_F(OptimizerStepTest, ZeroGrad) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerStepTest, ZeroGradWithNone) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(false); +} diff --git a/tests/slow/CMakeLists.txt b/tests/slow/CMakeLists.txt index 22ef9c5e..dd76f187 100644 --- a/tests/slow/CMakeLists.txt +++ b/tests/slow/CMakeLists.txt @@ -1,27 +1,13 @@ -# Slow label smoke tests - -add_executable(test_slow - test_slow.cc -) -target_link_libraries(test_slow - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_slow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_slow PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME slow_cpu COMMAND test_slow --gtest_filter=SlowTest.Cpu) -set_tests_properties(slow_cpu PROPERTIES LABELS "slow;cpu") - -add_test(NAME slow_cuda COMMAND test_slow --gtest_filter=SlowTest.Cuda) -set_tests_properties(slow_cuda PROPERTIES LABELS "slow;cuda") - -add_test(NAME slow_distributed COMMAND test_slow --gtest_filter=SlowTest.Distributed) -set_tests_properties(slow_distributed PROPERTIES LABELS "slow;cuda;distributed") +# ============================================================================ +# Slow tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS slow cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Slow tests (标签包含 slow) +# ----------------------------------------------------------------------------- +infini_train_add_test(test_slow SOURCES test_slow.cc LABELS slow cpu cuda distributed) diff --git a/tests/tensor/CMakeLists.txt b/tests/tensor/CMakeLists.txt index bd074f6f..3df1708e 100644 --- a/tests/tensor/CMakeLists.txt +++ b/tests/tensor/CMakeLists.txt @@ -1,31 +1,126 @@ # Tensor tests -add_executable(test_tensor - test_tensor.cc +# test_tensor_create +add_executable(test_tensor_create + test_tensor_create.cc ) -target_compile_options(test_tensor PRIVATE -Wno-error) -target_link_libraries(test_tensor +target_compile_options(test_tensor_create PRIVATE -Wno-error) +target_link_libraries(test_tensor_create PRIVATE GTest::gtest GTest::gtest_main ) -target_include_directories(test_tensor PRIVATE +target_include_directories(test_tensor_create PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ${glog_SOURCE_DIR}/src ) +target_link_libraries(test_tensor_create PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_create COMMAND test_tensor_create) +set_tests_properties(tensor_create PROPERTIES LABELS "cpu;cuda") -target_link_libraries(test_tensor PRIVATE +# test_tensor_copy +add_executable(test_tensor_copy + test_tensor_copy.cc +) +target_compile_options(test_tensor_copy PRIVATE -Wno-error) +target_link_libraries(test_tensor_copy + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_copy PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_copy PRIVATE "-Wl,--whole-archive" infini_train infini_train_cpu_kernels "-Wl,--no-whole-archive" ) +add_test(NAME tensor_copy COMMAND test_tensor_copy) +set_tests_properties(tensor_copy PROPERTIES LABELS "cpu;cuda") -add_test(NAME tensor_cpu COMMAND test_tensor) -set_tests_properties(tensor_cpu PROPERTIES LABELS "cpu") +# test_tensor_delete +add_executable(test_tensor_delete + test_tensor_delete.cc +) +target_compile_options(test_tensor_delete PRIVATE -Wno-error) +target_link_libraries(test_tensor_delete + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_delete PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_delete PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_delete COMMAND test_tensor_delete) +set_tests_properties(tensor_delete PROPERTIES LABELS "cpu") -add_test(NAME tensor_cuda COMMAND test_tensor --gtest_filter=TensorTest.*CUDA) -set_tests_properties(tensor_cuda PROPERTIES LABELS "cuda") +# test_tensor_op (keep original for backward compatibility) +add_executable(test_tensor_op + test_tensor.cc +) +target_compile_options(test_tensor_op PRIVATE -Wno-error) +target_link_libraries(test_tensor_op + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_op PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_op PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_ops COMMAND test_tensor_op --gtest_filter=TensorOpTest.*) +set_tests_properties(tensor_ops PROPERTIES LABELS "cpu;cuda") -add_test(NAME tensor_distributed COMMAND test_tensor --gtest_filter=TensorTest.*Distributed) +# test_tensor_distributed (keep original for backward compatibility) +add_executable(test_tensor_dist + test_tensor.cc +) +target_compile_options(test_tensor_dist PRIVATE -Wno-error) +target_link_libraries(test_tensor_dist + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_dist PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_dist PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_distributed COMMAND test_tensor_dist --gtest_filter=TensorDistributedTest.*) set_tests_properties(tensor_distributed PROPERTIES LABELS "cuda;distributed") + +# Convenience aggregate target so `cmake --build ... --target test_tensor` works +add_custom_target(test_tensor + DEPENDS + test_tensor_create + test_tensor_copy + test_tensor_delete + test_tensor_op + test_tensor_dist +) diff --git a/tests/tensor/test_tensor.cc b/tests/tensor/test_tensor.cc index b2c40a57..23eb940c 100644 --- a/tests/tensor/test_tensor.cc +++ b/tests/tensor/test_tensor.cc @@ -1,6 +1,7 @@ #include #include +#include #include "infini_train/include/tensor.h" #include "infini_train/include/nn/parallel/global.h" @@ -8,14 +9,36 @@ using namespace infini_train; -class TensorTest : public ::testing::Test { +class TensorTestBase : public ::testing::Test { protected: static void SetUpTestSuite() { nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); } + + static size_t Numel(const std::shared_ptr& tensor) { + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + return n; + } + + static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + auto n = Numel(tensor); + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } + } }; -TEST_F(TensorTest, CreateAndDestroy) { +class TensorCreateTest : public TensorTestBase {}; +class TensorCopyTest : public TensorTestBase {}; +class TensorDeleteTest : public TensorTestBase {}; +class TensorOpTest : public TensorTestBase {}; +class TensorDistributedTest : public TensorTestBase {}; + +TEST_F(TensorCreateTest, CreatesCpuTensorWithShapeAndType) { auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); EXPECT_NE(tensor, nullptr); @@ -23,7 +46,7 @@ TEST_F(TensorTest, CreateAndDestroy) { EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); } -TEST_F(TensorTest, RequiresGrad) { +TEST_F(TensorCreateTest, TracksRequiresGrad) { auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); EXPECT_FALSE(tensor->requires_grad()); @@ -31,13 +54,13 @@ TEST_F(TensorTest, RequiresGrad) { EXPECT_TRUE(tensor->requires_grad()); } -TEST_F(TensorTest, DataPointer) { +TEST_F(TensorCreateTest, ProvidesDataPointer) { auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); EXPECT_NE(tensor->DataPtr(), nullptr); } -TEST_F(TensorTest, DifferentShapes) { +TEST_F(TensorCreateTest, SupportsMultipleShapes) { std::vector> shapes = { {2, 3}, {4, 5, 6}, @@ -52,7 +75,7 @@ TEST_F(TensorTest, DifferentShapes) { } } -TEST_F(TensorTest, DifferentDataTypes) { +TEST_F(TensorCreateTest, SupportsMultipleDtypes) { std::vector dtypes = { DataType::kFLOAT32, DataType::kBFLOAT16, @@ -65,19 +88,19 @@ TEST_F(TensorTest, DifferentDataTypes) { } } -TEST_F(TensorTest, CreateCUDA) { +TEST_F(TensorCreateTest, CreatesTensorOnCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); EXPECT_NE(tensor, nullptr); + EXPECT_TRUE(tensor->IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); - EXPECT_TRUE(tensor->IsCUDA()); #endif } -TEST_F(TensorTest, RequiresGradCUDA) { +TEST_F(TensorCreateTest, TracksRequiresGradOnCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -88,7 +111,7 @@ TEST_F(TensorTest, RequiresGradCUDA) { #endif } -TEST_F(TensorTest, DataPointerCUDA) { +TEST_F(TensorCreateTest, ProvidesDataPointerOnCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -97,24 +120,52 @@ TEST_F(TensorTest, DataPointerCUDA) { #endif } -TEST_F(TensorTest, TensorCopyCUDA) { +TEST_F(TensorCopyTest, CopiesCPUToCPU) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 1.0f); + + target->CopyFrom(source); + + auto* target_data = static_cast(target->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(target_data[i], 1.0f + static_cast(i)); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - - auto* cpu_data = static_cast(cpu_tensor->DataPtr()); - for (int i = 0; i < 6; ++i) cpu_data[i] = static_cast(i); - - cuda_tensor->CopyDataFrom(cpu_tensor.get()); - + Device(Device::DeviceType::kCUDA, 0)); + + FillSequential(cpu_tensor, 0.0f); + cuda_tensor->CopyFrom(cpu_tensor); + EXPECT_TRUE(cuda_tensor->IsCUDA()); #endif } -TEST_F(TensorTest, MatmulCUDA) { +TEST_F(TensorCopyTest, CopiesCUDAtoCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + FillSequential(source, 2.0f); + + target->CopyFrom(source); + + EXPECT_TRUE(target->IsCUDA()); +#endif +} + +TEST_F(TensorOpTest, MatmulCUDAAllocatesOutputs) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -132,40 +183,72 @@ TEST_F(TensorTest, MatmulCUDA) { #endif } -TEST_F(TensorTest, DistributedAllReduce) { +TEST_F(TensorDeleteTest, ReleasesResourcesOnReset) { + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +} + +TEST_F(TensorDeleteTest, MoveTransferKeepsData) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 5.0f); + + auto moved = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + ASSERT_NE(moved, nullptr); + + auto* data = static_cast(moved->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(data[i], 5.0f + static_cast(i)); + } +} + +TEST_F(TensorDistributedTest, AllReduce) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - + auto* data = static_cast(tensor->DataPtr()); for (int i = 0; i < 6; ++i) data[i] = 1.0f; - + EXPECT_TRUE(tensor->IsCUDA()); EXPECT_TRUE(tensor->requires_grad()); #endif } -TEST_F(TensorTest, DistributedAllGather) { +TEST_F(TensorDistributedTest, AllGather) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto tensor = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - + EXPECT_TRUE(tensor->IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{4, 4})); #endif } -TEST_F(TensorTest, DistributedReduceScatter) { +TEST_F(TensorDistributedTest, ReduceScatter) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto tensor = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - + EXPECT_TRUE(tensor->IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{2, 8})); #endif diff --git a/tests/tensor/test_tensor_copy.cc b/tests/tensor/test_tensor_copy.cc new file mode 100644 index 00000000..452062c4 --- /dev/null +++ b/tests/tensor/test_tensor_copy.cc @@ -0,0 +1,123 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorCopyTest : public infini_train::test::TensorTestBase {}; + +static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCPU) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 1.0f); + + target->CopyFrom(source); + + auto* target_data = static_cast(target->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(target_data[i], 1.0f + static_cast(i)); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + + FillSequential(cpu_tensor, 0.0f); + cuda_tensor->CopyFrom(cpu_tensor); + + EXPECT_TRUE(cuda_tensor->IsCUDA()); +#endif +} + +TEST_F(TensorCopyTest, CopiesCUDAtoCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + FillSequential(source, 2.0f); + + target->CopyFrom(source); + + EXPECT_TRUE(target->IsCUDA()); +#endif +} + +TEST_F(TensorCopyTest, CopiesCUDAtoCPU) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + + FillSequential(cuda_tensor, 1.0f); + cpu_tensor->CopyFrom(cuda_tensor); + + EXPECT_FALSE(cpu_tensor->IsCUDA()); + EXPECT_TRUE(cpu_tensor->IsCPU()); +#endif +} + +TEST_F(TensorCopyTest, CopiesBetweenSameShape) { + auto source = std::make_shared(std::vector{4, 5, 6}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{4, 5, 6}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 0.0f); + + target->CopyFrom(source); + + EXPECT_EQ(source->Dims(), target->Dims()); +} + +TEST_F(TensorCopyTest, CopiesPreservesDataType) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + + EXPECT_EQ(source->Dtype(), target->Dtype()); + target->CopyFrom(source); + EXPECT_EQ(target->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorCopyTest, CopiesWithDifferentDeviceId) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 1)); + FillSequential(source, 5.0f); + + target->CopyFrom(source); + + EXPECT_EQ(source->GetDevice().index(), 0); + EXPECT_EQ(target->GetDevice().index(), 1); +#endif +} diff --git a/tests/tensor/test_tensor_create.cc b/tests/tensor/test_tensor_create.cc new file mode 100644 index 00000000..c39246e3 --- /dev/null +++ b/tests/tensor/test_tensor_create.cc @@ -0,0 +1,94 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorCreateTest : public infini_train::test::TensorTestBase {}; + +TEST_F(TensorCreateTest, CreatesCpuTensorWithShapeAndType) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorCreateTest, TracksRequiresGrad) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +} + +TEST_F(TensorCreateTest, ProvidesDataPointer) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +} + +TEST_F(TensorCreateTest, SupportsMultipleShapes) { + std::vector> shapes = { + {2, 3}, + {4, 5, 6}, + {10}, + {1, 1, 1, 1} + }; + + for (const auto& shape : shapes) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dims(), shape); + } +} + +TEST_F(TensorCreateTest, SupportsMultipleDtypes) { + std::vector dtypes = { + DataType::kFLOAT32, + DataType::kBFLOAT16, + }; + + for (const auto& dtype : dtypes) { + auto tensor = std::make_shared(std::vector{2, 3}, dtype, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dtype(), dtype); + } +} + +TEST_F(TensorCreateTest, CreatesTensorOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +#endif +} + +TEST_F(TensorCreateTest, TracksRequiresGradOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorCreateTest, ProvidesDataPointerOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +#endif +} diff --git a/tests/tensor/test_tensor_delete.cc b/tests/tensor/test_tensor_delete.cc new file mode 100644 index 00000000..e959b807 --- /dev/null +++ b/tests/tensor/test_tensor_delete.cc @@ -0,0 +1,104 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorDeleteTest : public infini_train::test::TensorTestBase {}; + +static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } +} + +TEST_F(TensorDeleteTest, ReleasesResourcesOnReset) { + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +} + +TEST_F(TensorDeleteTest, MoveTransferKeepsData) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 5.0f); + + auto moved = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + ASSERT_NE(moved, nullptr); + + auto* data = static_cast(moved->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(data[i], 5.0f + static_cast(i)); + } +} + +TEST_F(TensorDeleteTest, NullifiesPointerOnMove) { + auto tensor = std::make_shared(std::vector{3, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + + auto moved_tensor = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + EXPECT_NE(moved_tensor, nullptr); +} + +TEST_F(TensorDeleteTest, SharedPtrRefCountOnCopy) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 1.0f); + + auto copy1 = tensor; + auto copy2 = tensor; + + EXPECT_EQ(tensor.use_count(), 3); + EXPECT_EQ(copy1.use_count(), 3); + EXPECT_EQ(copy2.use_count(), 3); + + copy1.reset(); + EXPECT_EQ(tensor.use_count(), 2); + + copy2.reset(); + EXPECT_EQ(tensor.use_count(), 1); + + EXPECT_NE(tensor, nullptr); +} + +TEST_F(TensorDeleteTest, TensorDestroyedAfterScope) { + bool destroyed = false; + { + auto tensor = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + } +} + +TEST_F(TensorDeleteTest, ReleaseMemoryOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{100, 100}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->IsCUDA()); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +#endif +}