diff --git a/.gitmodules b/.gitmodules index 470cf466..64299a70 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ [submodule "third_party/glog"] path = third_party/glog - url = git@github.com:google/glog.git + url = https://github.com/google/glog.git [submodule "third_party/gflags"] path = third_party/gflags - url = git@github.com:gflags/gflags.git + url = https://github.com/gflags/gflags.git [submodule "third_party/eigen"] path = third_party/eigen - url = git@github.com:InfiniTensor/eigen-mirror.git + url = https://github.com/eigenteam/eigen-git-mirror.git diff --git a/CMakeLists.txt b/CMakeLists.txt index df636b27..9cd2ff75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,7 @@ option(USE_CUDA "Support NVIDIA CUDA" OFF) option(PROFILE_MODE "ENABLE PROFILE MODE" OFF) option(USE_OMP "Use OpenMP as backend for Eigen" ON) option(USE_NCCL "Build project for distributed running" ON) +option(BUILD_TEST "Build InfiniTrain tests" ON) project(infini_train VERSION 0.5.0 LANGUAGES CXX) @@ -14,6 +15,21 @@ set(CMAKE_CXX_EXTENSIONS OFF) # Generate compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +# ------------------------------------------------------------------------------ +# GoogleTest (FetchContent) +# ------------------------------------------------------------------------------ +if(BUILD_TEST) + include(FetchContent) + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.14.0 + ) + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + FetchContent_MakeAvailable(googletest) + enable_testing() +endif() + # ------------------------------------------------------------------------------ # Third-party deps # ------------------------------------------------------------------------------ @@ -26,7 +42,9 @@ include_directories(${gflags_SOURCE_DIR}/include) set(WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE) set(WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE) add_subdirectory(third_party/glog) +# add_compile_definitions(GLOG_USE_GLOG_EXPORT=1) include_directories(${glog_SOURCE_DIR}/src) +# include_directories(${glog_BINARY_DIR}/glog) # eigen if(USE_OMP) @@ -48,6 +66,10 @@ endif() # Framework core sources (*.cc), excluding cpu kernels (they are built separately) file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/infini_train/src/*.cc) list(FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*") +if(NOT USE_CUDA) + list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*") + list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*") +endif() if(NOT USE_NCCL) list(FILTER SRC EXCLUDE REGEX ".*infini_train/src/core/ccl/cuda/.*") endif() @@ -190,17 +212,8 @@ add_executable(llama3 ) link_infini_train_exe(llama3) -# Tools -add_subdirectory(tools/infini_run) -set_target_properties(infini_run PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) # Tests -add_executable(test_hook test/hook/test_hook.cc) -link_infini_train_exe(test_hook) - -add_executable(test_precision_check test/hook/test_precision_check.cc) -link_infini_train_exe(test_precision_check) - -add_executable(test_lora test/lora/test_lora.cc) -link_infini_train_exe(test_lora) - +if(BUILD_TEST) + add_subdirectory(tests) +endif() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..dae5f972 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,26 @@ +# Tests CMakeLists.txt +# This file manages the test infrastructure for InfiniTrain + +# Include shared test macros (must be before any test subdirectory) +include(${CMAKE_CURRENT_SOURCE_DIR}/common/test_macros.cmake) + +# Common test utilities +add_subdirectory(common) + +# Tensor tests +add_subdirectory(tensor) + +# Optimizer tests +add_subdirectory(optimizer) + +# Autograd operator tests +add_subdirectory(autograd) + +# LoRA tests +add_subdirectory(lora) + +# Hook tests +add_subdirectory(hook) + +# Slow label tests +add_subdirectory(slow) diff --git a/tests/autograd/CMakeLists.txt b/tests/autograd/CMakeLists.txt new file mode 100644 index 00000000..18a16fa2 --- /dev/null +++ b/tests/autograd/CMakeLists.txt @@ -0,0 +1,56 @@ +# ============================================================================ +# Autograd tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Elementwise tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_elementwise_forward SOURCES test_autograd_elementwise_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_elementwise_backward SOURCES test_autograd_elementwise_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Matmul tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_matmul_forward SOURCES test_autograd_matmul_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_matmul_backward SOURCES test_autograd_matmul_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Reduction tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_reduction_forward SOURCES test_autograd_reduction_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_reduction_backward SOURCES test_autograd_reduction_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Linear tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_linear_forward SOURCES test_autograd_linear_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_linear_backward SOURCES test_autograd_linear_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Softmax tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_softmax_forward SOURCES test_autograd_softmax_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_softmax_backward SOURCES test_autograd_softmax_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Transform tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_transform_forward SOURCES test_autograd_transform_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_transform_backward SOURCES test_autograd_transform_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Normalization tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_normalization_forward SOURCES test_autograd_normalization_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_normalization_backward SOURCES test_autograd_normalization_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Legacy combined tests +# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现 +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_legacy SOURCES test_autograd.cc LABELS cpu cuda distributed) diff --git a/tests/autograd/test_autograd.cc b/tests/autograd/test_autograd.cc new file mode 100644 index 00000000..b46358da --- /dev/null +++ b/tests/autograd/test_autograd.cc @@ -0,0 +1,557 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/matmul.h" +#include "infini_train/include/autograd/reduction.h" +#include "infini_train/include/autograd/activations.h" +#include "infini_train/include/autograd/softmax.h" +#include "infini_train/include/autograd/normalization.h" +#include "infini_train/include/autograd/linear.h" +#include "infini_train/include/autograd/outer.h" +#include "infini_train/include/autograd/misc.h" + +using namespace infini_train; + +class AutogradTestBase : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } + + std::shared_ptr createTensor(const std::vector& shape, float value = 0.0f) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : shape) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + return tensor; + } +}; + +class AutogradForwardTest : public AutogradTestBase {}; +class AutogradBackwardTest : public AutogradTestBase {}; +class AutogradCudaTest : public AutogradTestBase {}; +class AutogradDistributedTest : public AutogradTestBase {}; + +TEST_F(AutogradForwardTest, AddForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradBackwardTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradForwardTest, SubForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, MulForward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradBackwardTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradForwardTest, DivForward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, NegForward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, SinForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, CosForward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, TanhForward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, ExpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, LogForward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, ReciprocalForward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, PowForward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, RsqrtForward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, SigmoidForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sigmoid_fn = std::make_shared(); + auto result = sigmoid_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, MatmulForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradForwardTest, SumForward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, MeanForward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, MaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, MinForward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, SoftmaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradForwardTest, LayerNormForward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, LinearForward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradForwardTest, TransposeForward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradForwardTest, SliceForward) { + auto a = createTensor({4, 4}, 1.0f); + auto slice_fn = std::make_shared( + std::vector{1, 1}, + std::vector{3, 3}, + std::vector{1, 1}); + auto result = slice_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, SplitForward) { + auto a = createTensor({4, 4}, 1.0f); + auto split_fn = std::make_shared(2, 0); + auto result = split_fn->Apply({a}); + EXPECT_EQ(result.size(), 2); +} + +TEST_F(AutogradForwardTest, ConcatForward) { + auto a = createTensor({2, 2}, 1.0f); + auto b = createTensor({2, 2}, 2.0f); + auto concat_fn = std::make_shared(0); + auto result = concat_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); +} + +TEST_F(AutogradForwardTest, StackForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto stack_fn = std::make_shared(0); + auto result = stack_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); +} + +TEST_F(AutogradForwardTest, TrilForward) { + auto a = createTensor({3, 3}, 1.0f); + auto tril_fn = std::make_shared(0); + auto result = tril_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, TriuForward) { + auto a = createTensor({3, 3}, 1.0f); + auto triu_fn = std::make_shared(0); + auto result = triu_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, OuterForward) { + auto a = createTensor({3}, 1.0f); + auto b = createTensor({4}, 1.0f); + auto outer_fn = std::make_shared(); + auto result = outer_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 4})); +} + +TEST_F(AutogradForwardTest, AddScalarForward) { + auto a = createTensor({2, 3}, 1.0f); + auto add_scalar_fn = std::make_shared(2.0f); + auto result = add_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, MulScalarForward) { + auto a = createTensor({2, 3}, 2.0f); + auto mul_scalar_fn = std::make_shared(3.0f); + auto result = mul_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, LtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto lt_fn = std::make_shared(); + auto result = lt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, LeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto le_fn = std::make_shared(); + auto result = le_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, GtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto gt_fn = std::make_shared(); + auto result = gt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, GeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto ge_fn = std::make_shared(); + auto result = ge_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, EqualsForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto eq_fn = std::make_shared(); + auto result = eq_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, AndForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 1.0f); + auto and_fn = std::make_shared(); + auto result = and_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, OrForward) { + auto a = createTensor({2, 3}, 0.0f); + auto b = createTensor({2, 3}, 1.0f); + auto or_fn = std::make_shared(); + auto result = or_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradForwardTest, NoOpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto noop_fn = std::make_shared(std::vector{2, 3}); + auto result = noop_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +// ============================================================================ +// CUDA Tests - require CUDA build and GPU +// ============================================================================ + +#ifdef USE_CUDA +TEST_F(AutogradCudaTest, AddForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto b = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + auto b_data = static_cast(b->DataPtr()); + for (int i = 0; i < 6; ++i) b_data[i] = 2.0f; + + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradCudaTest, MatmulForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto b = std::make_shared(std::vector{3, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + auto b_data = static_cast(b->DataPtr()); + for (int i = 0; i < 12; ++i) b_data[i] = 1.0f; + + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradCudaTest, SumForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradCudaTest, SoftmaxForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradCudaTest, LinearForwardCUDA) { + auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + input->set_requires_grad(true); + auto input_data = static_cast(input->DataPtr()); + for (int i = 0; i < 6; ++i) input_data[i] = 1.0f; + + auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + weight->set_requires_grad(true); + auto weight_data = static_cast(weight->DataPtr()); + for (int i = 0; i < 12; ++i) weight_data[i] = 1.0f; + + auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + bias->set_requires_grad(true); + auto bias_data = static_cast(bias->DataPtr()); + for (int i = 0; i < 4; ++i) bias_data[i] = 0.0f; + + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} +#endif // USE_CUDA + +// ============================================================================ +// Distributed Tests - require CUDA + NCCL +// ============================================================================ + +#ifdef USE_NCCL +TEST_F(AutogradDistributedTest, AllReduceDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(a->requires_grad()); +} + +TEST_F(AutogradDistributedTest, AllGatherDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); + auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 16; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_EQ(a->Dims(), (std::vector{4, 4})); +} + +TEST_F(AutogradDistributedTest, ReduceScatterDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); + auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 16; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_EQ(a->Dims(), (std::vector{2, 8})); +} + +TEST_F(AutogradDistributedTest, DistributedMatmul) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); + auto a = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto b = std::make_shared(std::vector{4, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + + EXPECT_EQ(result.size(), 1); + EXPECT_TRUE(result[0]->IsCUDA()); +} + +TEST_F(AutogradDistributedTest, DistributedLinear) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); + auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + input->set_requires_grad(true); + auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + weight->set_requires_grad(true); + auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + bias->set_requires_grad(true); + + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); + EXPECT_TRUE(result[0]->IsCUDA()); +} +#endif // USE_NCCL diff --git a/tests/autograd/test_autograd_elementwise_backward.cc b/tests/autograd/test_autograd_elementwise_backward.cc new file mode 100644 index 00000000..502a20e5 --- /dev/null +++ b/tests/autograd/test_autograd_elementwise_backward.cc @@ -0,0 +1,134 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/elementwise.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradElementwiseBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradElementwiseBackwardTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, SubBackward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = sub_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, DivBackward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = div_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, NegBackward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = neg_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, SinBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = sin_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, CosBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = cos_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, TanhBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = tanh_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, ExpBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = exp_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, LogBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = log_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, ReciprocalBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = reciprocal_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, PowBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = pow_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, RsqrtBackward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = rsqrt_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_elementwise_forward.cc b/tests/autograd/test_autograd_elementwise_forward.cc new file mode 100644 index 00000000..63b386b1 --- /dev/null +++ b/tests/autograd/test_autograd_elementwise_forward.cc @@ -0,0 +1,187 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/activations.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradElementwiseForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradElementwiseForwardTest, AddForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradElementwiseForwardTest, SubForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, MulForward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, DivForward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, NegForward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, SinForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, CosForward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, TanhForward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, ExpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LogForward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, ReciprocalForward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, PowForward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, RsqrtForward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, SigmoidForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sigmoid_fn = std::make_shared(); + auto result = sigmoid_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, AddScalarForward) { + auto a = createTensor({2, 3}, 1.0f); + auto add_scalar_fn = std::make_shared(2.0f); + auto result = add_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, MulScalarForward) { + auto a = createTensor({2, 3}, 2.0f); + auto mul_scalar_fn = std::make_shared(3.0f); + auto result = mul_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto lt_fn = std::make_shared(); + auto result = lt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto le_fn = std::make_shared(); + auto result = le_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, GtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto gt_fn = std::make_shared(); + auto result = gt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, GeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto ge_fn = std::make_shared(); + auto result = ge_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, EqualsForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto eq_fn = std::make_shared(); + auto result = eq_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, AndForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 1.0f); + auto and_fn = std::make_shared(); + auto result = and_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, OrForward) { + auto a = createTensor({2, 3}, 0.0f); + auto b = createTensor({2, 3}, 1.0f); + auto or_fn = std::make_shared(); + auto result = or_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/autograd/test_autograd_linear_backward.cc b/tests/autograd/test_autograd_linear_backward.cc new file mode 100644 index 00000000..069affc7 --- /dev/null +++ b/tests/autograd/test_autograd_linear_backward.cc @@ -0,0 +1,33 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/linear.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradLinearBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradLinearBackwardTest, LinearBackward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = linear_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +TEST_F(AutogradLinearBackwardTest, LinearBackwardNoBias) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = linear_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} diff --git a/tests/autograd/test_autograd_linear_forward.cc b/tests/autograd/test_autograd_linear_forward.cc new file mode 100644 index 00000000..efd8d6eb --- /dev/null +++ b/tests/autograd/test_autograd_linear_forward.cc @@ -0,0 +1,41 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/linear.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradLinearForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradLinearForwardTest, LinearForward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradLinearForwardTest, LinearNoBias) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradLinearForwardTest, LinearBatch) { + auto input = createTensor({32, 128}, 1.0f); + auto weight = createTensor({64, 128}, 1.0f); + auto bias = createTensor({64}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{32, 64})); +} diff --git a/tests/autograd/test_autograd_matmul_backward.cc b/tests/autograd/test_autograd_matmul_backward.cc new file mode 100644 index 00000000..e9962f5d --- /dev/null +++ b/tests/autograd/test_autograd_matmul_backward.cc @@ -0,0 +1,42 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/matmul.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradMatmulBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradMatmulBackwardTest, MatmulBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradMatmulBackwardTest, MatmulBackwardSquare) { + auto a = createTensor({3, 3}, 2.0f); + auto b = createTensor({3, 3}, 3.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({3, 3}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradMatmulBackwardTest, MatmulBackwardDifferentShapes) { + auto a = createTensor({3, 4}, 1.5f); + auto b = createTensor({4, 2}, 2.5f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({3, 2}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} diff --git a/tests/autograd/test_autograd_matmul_forward.cc b/tests/autograd/test_autograd_matmul_forward.cc new file mode 100644 index 00000000..87c93f08 --- /dev/null +++ b/tests/autograd/test_autograd_matmul_forward.cc @@ -0,0 +1,48 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/matmul.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradMatmulForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradMatmulForwardTest, MatmulForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulDifferentShapes) { + auto a = createTensor({3, 4}, 1.0f); + auto b = createTensor({4, 2}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulBatch) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto b = createTensor({2, 4, 5}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 5})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulSquare) { + auto a = createTensor({3, 3}, 1.0f); + auto b = createTensor({3, 3}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 3})); +} diff --git a/tests/autograd/test_autograd_normalization_backward.cc b/tests/autograd/test_autograd_normalization_backward.cc new file mode 100644 index 00000000..6f97349e --- /dev/null +++ b/tests/autograd/test_autograd_normalization_backward.cc @@ -0,0 +1,34 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/normalization.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradNormalizationBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradNormalizationBackwardTest, LayerNormBackward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + auto grad = createTensor({2, 3, 4}, 1.0f); + auto grad_inputs = layernorm_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +TEST_F(AutogradNormalizationBackwardTest, LayerNormBackwardZeroBias) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + auto grad = createTensor({2, 3, 4}, 1.0f); + auto grad_inputs = layernorm_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} diff --git a/tests/autograd/test_autograd_normalization_forward.cc b/tests/autograd/test_autograd_normalization_forward.cc new file mode 100644 index 00000000..d58fd749 --- /dev/null +++ b/tests/autograd/test_autograd_normalization_forward.cc @@ -0,0 +1,40 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/normalization.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradNormalizationForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradNormalizationForwardTest, LayerNormForward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradNormalizationForwardTest, LayerNormZeroBias) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradNormalizationForwardTest, LayerNormThreeDim) { + auto a = createTensor({2, 1, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 1, 4})); +} diff --git a/tests/autograd/test_autograd_reduction_backward.cc b/tests/autograd/test_autograd_reduction_backward.cc new file mode 100644 index 00000000..d212a065 --- /dev/null +++ b/tests/autograd/test_autograd_reduction_backward.cc @@ -0,0 +1,66 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/reduction.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradReductionBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradReductionBackwardTest, SumBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = sum_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MeanBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = mean_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MaxBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = max_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MinBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = min_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, SumBackwardKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, true); + auto result = sum_fn->Apply({a}); + auto grad = createTensor({2, 1}, 1.0f); + auto grad_inputs = sum_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MeanBackwardKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, true); + auto result = mean_fn->Apply({a}); + auto grad = createTensor({2, 1}, 1.0f); + auto grad_inputs = mean_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_reduction_forward.cc b/tests/autograd/test_autograd_reduction_forward.cc new file mode 100644 index 00000000..b4f8edb7 --- /dev/null +++ b/tests/autograd/test_autograd_reduction_forward.cc @@ -0,0 +1,54 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/reduction.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradReductionForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradReductionForwardTest, SumForward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MeanForward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MinForward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, SumKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, true); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MeanKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, true); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/autograd/test_autograd_softmax_backward.cc b/tests/autograd/test_autograd_softmax_backward.cc new file mode 100644 index 00000000..6d3f02a4 --- /dev/null +++ b/tests/autograd/test_autograd_softmax_backward.cc @@ -0,0 +1,30 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/softmax.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradSoftmaxBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = softmax_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackwardDim0) { + auto a = createTensor({4, 3}, 1.0f); + auto softmax_fn = std::make_shared(0); + auto result = softmax_fn->Apply({a}); + auto grad = createTensor({4, 3}, 1.0f); + auto grad_inputs = softmax_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_softmax_forward.cc b/tests/autograd/test_autograd_softmax_forward.cc new file mode 100644 index 00000000..c3d196f1 --- /dev/null +++ b/tests/autograd/test_autograd_softmax_forward.cc @@ -0,0 +1,36 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/softmax.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradSoftmaxForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxDim0) { + auto a = createTensor({4, 3}, 1.0f); + auto softmax_fn = std::make_shared(0); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 3})); +} + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxLastDim) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto softmax_fn = std::make_shared(2); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 4})); +} diff --git a/tests/autograd/test_autograd_transform_backward.cc b/tests/autograd/test_autograd_transform_backward.cc new file mode 100644 index 00000000..1613f1a2 --- /dev/null +++ b/tests/autograd/test_autograd_transform_backward.cc @@ -0,0 +1,21 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradTransformBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradTransformBackwardTest, TransposeBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + auto grad = createTensor({3, 2}, 1.0f); + auto grad_inputs = transpose_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_transform_forward.cc b/tests/autograd/test_autograd_transform_forward.cc new file mode 100644 index 00000000..67b20adb --- /dev/null +++ b/tests/autograd/test_autograd_transform_forward.cc @@ -0,0 +1,70 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/autograd/misc.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradTransformForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradTransformForwardTest, TransposeForward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradTransformForwardTest, SliceForward) { + auto a = createTensor({4, 4}, 1.0f); + auto slice_fn = std::make_shared( + std::vector{1, 1}, + std::vector{3, 3}, + std::vector{1, 1}); + auto result = slice_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTransformForwardTest, SplitForward) { + auto a = createTensor({4, 4}, 1.0f); + auto split_fn = std::make_shared(2, 0); + auto result = split_fn->Apply({a}); + EXPECT_EQ(result.size(), 2); +} + +TEST_F(AutogradTransformForwardTest, ConcatForward) { + auto a = createTensor({2, 2}, 1.0f); + auto b = createTensor({2, 2}, 2.0f); + auto concat_fn = std::make_shared(0); + auto result = concat_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); +} + +TEST_F(AutogradTransformForwardTest, StackForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto stack_fn = std::make_shared(0); + auto result = stack_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); +} + +TEST_F(AutogradTransformForwardTest, TrilForward) { + auto a = createTensor({3, 3}, 1.0f); + auto tril_fn = std::make_shared(0); + auto result = tril_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTransformForwardTest, TriuForward) { + auto a = createTensor({3, 3}, 1.0f); + auto triu_fn = std::make_shared(0); + auto result = triu_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/common/CMakeLists.txt b/tests/common/CMakeLists.txt new file mode 100644 index 00000000..3960d474 --- /dev/null +++ b/tests/common/CMakeLists.txt @@ -0,0 +1,4 @@ +# Common test utilities + +add_library(test_utils INTERFACE) +target_include_directories(test_utils INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/tests/common/test_macros.cmake b/tests/common/test_macros.cmake new file mode 100644 index 00000000..184083c0 --- /dev/null +++ b/tests/common/test_macros.cmake @@ -0,0 +1,97 @@ +# ============================================================================ +# InfiniTrain 测试宏 +# ============================================================================ +# 提供统一的测试配置接口,降低接入成本 +# +# 使用方法: +# 1. 在 tests/CMakeLists.txt 中 include 此文件 +# 2. 使用 infini_train_add_test 宏注册测试 +# +# 示例: +# infini_train_add_test( +# test_tensor_create +# SOURCES test_tensor_create.cc +# LABELS cpu cuda +# ) +# ============================================================================ + +include_guard(GLOBAL) + +# 获取 test_macros.cmake 所在目录(tests/common/) +set(TEST_MACROS_DIR "${CMAKE_CURRENT_LIST_DIR}") + +# ----------------------------------------------------------------------------- +# 加载 GoogleTest 模块(提供 gtest_discover_tests) +# ----------------------------------------------------------------------------- +include(GoogleTest) + +# ----------------------------------------------------------------------------- +# infini_train_add_test - 测试注册宏 +# ----------------------------------------------------------------------------- +# 功能: +# 1. 创建可执行文件 +# 2. 配置编译选项、链接库和头文件路径 +# 3. 使用 gtest_discover_tests 自动发现测试用例 +# 4. 设置测试标签 +# +# 参数: +# SOURCES: 源文件列表(必填) +# LABELS: 测试标签,如 "cpu" "cuda" "distributed"(可选,默认 "cpu") +# +# 示例: +# # 简单测试(1行) +# infini_train_add_test(test_example SOURCES test_example.cc LABELS cpu) +# +# # 多标签测试 +# infini_train_add_test(test_cuda_example SOURCES test_cuda.cc LABELS cuda distributed) +# ----------------------------------------------------------------------------- +macro(infini_train_add_test) + cmake_parse_arguments(ARG "" "TEST_NAME" "SOURCES;LABELS" ${ARGN}) + + if(NOT ARG_TEST_NAME) + set(ARG_TEST_NAME ${ARG_UNPARSED_ARGUMENTS}) + endif() + + if(NOT ARG_SOURCES) + message(FATAL_ERROR "infini_train_add_test: TEST_NAME and SOURCES are required") + endif() + + # 1. 创建可执行文件 + add_executable(${ARG_TEST_NAME} ${ARG_SOURCES}) + + # 2. 配置编译选项(禁用警告转错误,以便在宽松编译环境下运行) + target_compile_options(${ARG_TEST_NAME} PRIVATE -Wno-error) + + # 3. 链接 Google Test + target_link_libraries(${ARG_TEST_NAME} PRIVATE + GTest::gtest + GTest::gtest_main + ) + + # 4. 添加头文件路径 + target_include_directories(${ARG_TEST_NAME} PRIVATE + ${TEST_MACROS_DIR} + ${glog_SOURCE_DIR}/src + ) + + # 5. 链接项目库(whole-archive 方式解决静态库符号依赖) + target_link_libraries(${ARG_TEST_NAME} PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" + ) + + # 6. 使用 gtest_discover_tests 自动发现测试用例 + # 这会自动为每个 TEST_F() 创建一个 ctest 测试 + set(labels "cpu") + if(ARG_LABELS) + set(labels "${ARG_LABELS}") + endif() + + gtest_discover_tests(${ARG_TEST_NAME} + # 自动将测试输出重定向到 XML(便于 CI 集成) + EXTRA_ARGS --gtest_output=xml:%T.xml + PROPERTIES LABELS "${labels}" + ) +endmacro() diff --git a/tests/common/test_utils.h b/tests/common/test_utils.h new file mode 100644 index 00000000..409b720e --- /dev/null +++ b/tests/common/test_utils.h @@ -0,0 +1,164 @@ +#pragma once + +#include +#include + +#include +#include + +#if defined(USE_CUDA) +# if defined(__has_include) +# if __has_include() +# include +# else +# error "CUDA runtime headers are required when USE_CUDA=ON" +# endif +# else +# include +# endif +#endif + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" + +namespace infini_train { +namespace test { + +#ifdef USE_CUDA +inline int GetCudaDeviceCount() { + int count = 0; + cudaError_t err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + return 0; + } + return std::max(count, 0); +} +#else +inline int GetCudaDeviceCount() { + return 0; +} +#endif + +inline bool HasCudaRuntime() { + return GetCudaDeviceCount() > 0; +} + +inline bool HasNCCL() { +#ifdef USE_NCCL + return true; +#else + return false; +#endif +} + +inline bool HasDistributedSupport() { + return HasCudaRuntime() && HasNCCL() && GetCudaDeviceCount() >= 2; +} + +#define REQUIRE_CUDA() \ + do { \ + if (!infini_train::test::HasCudaRuntime()) { \ + GTEST_SKIP() << "requires CUDA support (found " << infini_train::test::GetCudaDeviceCount() << " GPUs)"; \ + } \ + } while (0) + +#define REQUIRE_MIN_GPUS(n) \ + do { \ + int available_gpus = infini_train::test::GetCudaDeviceCount(); \ + if (available_gpus < (n)) { \ + GTEST_SKIP() << "requires at least " << (n) << " GPUs (found " << available_gpus << ")"; \ + } \ + } while (0) + +#define REQUIRE_NCCL() \ + do { \ + if (!infini_train::test::HasNCCL()) { \ + GTEST_SKIP() << "NCCL support is disabled (build with USE_NCCL=ON)"; \ + } \ + } while (0) + +#define REQUIRE_DISTRIBUTED() \ + do { \ + REQUIRE_NCCL(); \ + REQUIRE_MIN_GPUS(2); \ + } while (0) + +class InfiniTrainTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class TensorTestBase : public InfiniTrainTest { +protected: + std::vector default_shape_{2, 3, 4}; + DataType default_dtype_{DataType::kFLOAT32}; + + std::shared_ptr createTensor(const std::vector& shape = {2, 3, 4}, + DataType dtype = DataType::kFLOAT32, + bool requires_grad = false, + Device::DeviceType device = Device::DeviceType::kCPU, + int device_id = 0) { + auto tensor = std::make_shared(shape, dtype, Device(device, device_id)); + tensor->set_requires_grad(requires_grad); + return tensor; + } + + void fillTensor(std::shared_ptr tensor, float value) { + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : tensor->Dims()) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + } +}; + +class CPUTensorTest : public TensorTestBase {}; + +#ifdef USE_CUDA +class CUDATensorTest : public TensorTestBase { +protected: + CUDATensorTest() { + default_shape_ = {2, 3, 4}; + default_dtype_ = DataType::kFLOAT32; + } +}; +#endif + +#ifdef USE_NCCL +class DistributedTensorTest : public TensorTestBase {}; +#endif + +class AutogradTestBase : public InfiniTrainTest { +protected: + std::shared_ptr createTensor(const std::vector& shape, + float value = 0.0f, + Device::DeviceType device = Device::DeviceType::kCPU, + int device_id = 0) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(device, device_id)); + tensor->set_requires_grad(true); + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : shape) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + return tensor; + } +}; + +class CPUAutogradTest : public AutogradTestBase {}; + +#ifdef USE_CUDA +class CUDAAutogradTest : public AutogradTestBase {}; +#endif + +#ifdef USE_NCCL +class DistributedAutogradTest : public AutogradTestBase {}; +#endif + +} // namespace test +} // namespace infini_train diff --git a/tests/hook/CMakeLists.txt b/tests/hook/CMakeLists.txt new file mode 100644 index 00000000..51c77ae0 --- /dev/null +++ b/tests/hook/CMakeLists.txt @@ -0,0 +1,14 @@ +# ============================================================================ +# Hook tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Hook tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_hook SOURCES test_hook.cc LABELS cpu cuda distributed) +infini_train_add_test(test_precision_check SOURCES test_precision_check.cc LABELS cpu) diff --git a/tests/hook/test_hook.cc b/tests/hook/test_hook.cc new file mode 100644 index 00000000..7f876c5e --- /dev/null +++ b/tests/hook/test_hook.cc @@ -0,0 +1,196 @@ +#include + +#include +#include + +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/function.h" +#include "infini_train/include/autograd/function_hook.h" +#include "infini_train/include/common/hook.h" +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "test_utils.h" + +using namespace infini_train; + +class HookTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class TestModule : public nn::Module { +public: + TestModule() : Module("TestModule") {} + + std::vector> Forward(const std::vector> &inputs) override { + return inputs; + } +}; + +TEST_F(HookTest, BasicModuleHooks) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + auto fwd_hook = module->RegisterForwardPostHook( + [](nn::Module *mod, const std::vector> &inputs, + const std::vector> &outputs) {}); + + auto bwd_pre_hook = module->RegisterBackwardPreHook( + [](nn::Module *mod, const std::vector> &grad_outputs) {}); + + auto bwd_post_hook = module->RegisterBackwardPostHook( + [](nn::Module *mod, const std::vector> &grad_inputs, + const std::vector> &grad_outputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); +} + +TEST_F(HookTest, HookRemove) { + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook1_count = 0; + int hook2_count = 0; + int hook3_count = 0; + + auto add_fn = std::make_shared(); + + auto handle1 = add_fn->RegisterForwardPreHook( + [&hook1_count](autograd::Function *, const std::vector> &) { + hook1_count++; + }); + + auto handle2 = add_fn->RegisterForwardPreHook( + [&hook2_count](autograd::Function *, const std::vector> &) { + hook2_count++; + }); + + auto handle3 = add_fn->RegisterForwardPreHook( + [&hook3_count](autograd::Function *, const std::vector> &) { + hook3_count++; + }); + + std::vector> inputs = {a, b}; + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 1); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 1); + + handle2->Remove(); + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 2); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 2); + + handle1->Remove(); + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 2); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 3); +} + +TEST_F(HookTest, BasicModuleHooksCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); + EXPECT_TRUE(outputs[0]->IsCUDA()); +#endif +} + +TEST_F(HookTest, HookRemoveCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook_count = 0; + auto add_fn = std::make_shared(); + + auto handle = add_fn->RegisterForwardPreHook( + [&hook_count](autograd::Function *, const std::vector> &) { + hook_count++; + }); + + std::vector> inputs = {a, b}; + add_fn->Apply(inputs); + + EXPECT_EQ(hook_count, 1); +#endif +} + +TEST_F(HookTest, DistributedModuleHooks) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); + EXPECT_TRUE(outputs[0]->IsCUDA()); +#endif +} + +TEST_F(HookTest, DistributedHookRemove) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook_count = 0; + auto add_fn = std::make_shared(); + + auto handle = add_fn->RegisterForwardPreHook( + [&hook_count](autograd::Function *, const std::vector> &) { + hook_count++; + }); + + std::vector> inputs = {a, b}; + add_fn->Apply(inputs); + + EXPECT_EQ(hook_count, 1); +#endif +} diff --git a/tests/hook/test_precision_check.cc b/tests/hook/test_precision_check.cc new file mode 100644 index 00000000..1970aa4f --- /dev/null +++ b/tests/hook/test_precision_check.cc @@ -0,0 +1,76 @@ +#include + +#include +#include +#include + +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "infini_train/include/utils/precision_check_config.h" +#include "infini_train/include/utils/precision_checker.h" + +using namespace infini_train; + +class PrecisionCheckTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class SimpleModel : public nn::Module { +public: + SimpleModel() : Module("SimpleModel") {} + + std::vector> Forward(const std::vector> &inputs) override { + auto x = inputs[0]; + x->RequiresGrad(); + auto y = x->Mul(x)->Mul(x); + return {y}; + } +}; + +TEST_F(PrecisionCheckTest, SimpleFormat) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + auto y = x->Mul(x); + auto loss = y->Sum(0, false)->Sum(0, false); + loss->Backward(); + + EXPECT_NE(x->DataPtr(), nullptr); +} + +TEST_F(PrecisionCheckTest, ModuleForwardBackward) { + auto model = std::make_shared(); + + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + std::vector> inputs = {x}; + auto outputs = (*model)(inputs); + auto loss = outputs[0]->Sum(0, false)->Sum(0, false); + loss->Backward(); + + EXPECT_TRUE(x->requires_grad()); +} + +TEST_F(PrecisionCheckTest, MultiIteration) { + auto model = std::make_shared(); + + for (int i = 0; i < 3; ++i) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + std::vector> inputs = {x}; + auto outputs = (*model)(inputs); + auto loss = outputs[0]->Sum(0, false)->Sum(0, false); + loss->Backward(); + } + + SUCCEED(); +} diff --git a/tests/lora/CMakeLists.txt b/tests/lora/CMakeLists.txt new file mode 100644 index 00000000..27b47c64 --- /dev/null +++ b/tests/lora/CMakeLists.txt @@ -0,0 +1,7 @@ +# ============================================================================ +# LoRA tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# ============================================================================ + +infini_train_add_test(test_lora SOURCES test_lora.cc LABELS cpu) diff --git a/tests/lora/test_lora.cc b/tests/lora/test_lora.cc new file mode 100644 index 00000000..19c24efd --- /dev/null +++ b/tests/lora/test_lora.cc @@ -0,0 +1,331 @@ +#include + +#include +#include +#include + +#include "infini_train/include/nn/lora/lora_config.h" +#include "infini_train/include/nn/lora/lora_linear.h" +#include "infini_train/include/nn/lora/lora_utils.h" +#include "infini_train/include/nn/modules/container.h" +#include "infini_train/include/nn/modules/linear.h" +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "test_utils.h" + +using namespace infini_train; +using namespace infini_train::nn::lora; + +class LoRATest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(LoRATest, LoRAConfigScaling) { + LoRAConfig config; + config.rank = 8; + config.alpha = 16.0f; + + float expected_scaling = 16.0f / 8.0f; + EXPECT_EQ(config.Scaling(), expected_scaling); +} + +TEST_F(LoRATest, LoRAConfigShouldApply) { + LoRAConfig config; + config.rank = 8; + config.alpha = 16.0f; + + EXPECT_TRUE(config.ShouldApplyLoRA("c_attn")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_attn")); + EXPECT_TRUE(config.ShouldApplyLoRA("c_proj")); + EXPECT_FALSE(config.ShouldApplyLoRA("c_fc")); + EXPECT_FALSE(config.ShouldApplyLoRA("random_layer")); +} + +TEST_F(LoRATest, LoRALinearFromModel) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto *lora_linear = dynamic_cast(model.get()); + ASSERT_NE(lora_linear, nullptr); + + EXPECT_EQ(lora_linear->in_features(), 64); + EXPECT_EQ(lora_linear->out_features(), 128); + EXPECT_EQ(lora_linear->rank(), 4); + + auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); + auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); + auto weight = lora_linear->parameter(nn::Linear::kParamWeightName); + + EXPECT_EQ(lora_A->Dims()[0], config.rank); + EXPECT_EQ(lora_A->Dims()[1], 64); + EXPECT_EQ(lora_B->Dims()[0], 128); + EXPECT_EQ(lora_B->Dims()[1], config.rank); + + EXPECT_FALSE(weight->requires_grad()); + EXPECT_TRUE(lora_A->requires_grad()); + EXPECT_TRUE(lora_B->requires_grad()); + + auto params = lora_linear->LoRAParameters(); + EXPECT_EQ(params.size(), 2); +} + +TEST_F(LoRATest, LoRALinearForward) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto input = std::make_shared(std::vector{2, 10, 64}, DataType::kFLOAT32); + + auto output = (*model)({input})[0]; + + EXPECT_EQ(output->Dims().size(), 3); + EXPECT_EQ(output->Dims()[0], 2); + EXPECT_EQ(output->Dims()[1], 10); + EXPECT_EQ(output->Dims()[2], 128); +} + +TEST_F(LoRATest, LoRALinearMerge) { + auto base_linear = std::make_shared(32, 64, /*bias=*/false); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto *lora_linear = dynamic_cast(model.get()); + ASSERT_NE(lora_linear, nullptr); + + auto input = std::make_shared(std::vector{2, 5, 32}, DataType::kFLOAT32); + input->EigenMatrix().setRandom(); + + auto output_before = (*model)({input})[0]; + float output_before_sum = output_before->EigenMatrix().sum(); + + EXPECT_FALSE(lora_linear->IsMerged()); + MergeLoRAWeights(model); + EXPECT_TRUE(lora_linear->IsMerged()); + + auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); + auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); + EXPECT_FALSE(lora_A->requires_grad()); + EXPECT_FALSE(lora_B->requires_grad()); + + auto output_merged = (*model)({input})[0]; + float output_merged_sum = output_merged->EigenMatrix().sum(); + EXPECT_NEAR(std::abs(output_before_sum - output_merged_sum), 0.0f, 1e-3); + + UnmergeLoRAWeights(model); + EXPECT_FALSE(lora_linear->IsMerged()); + EXPECT_TRUE(lora_A->requires_grad()); + EXPECT_TRUE(lora_B->requires_grad()); + + auto output_unmerged = (*model)({input})[0]; + EXPECT_EQ(output_before->Dims(), output_unmerged->Dims()); +} + +TEST_F(LoRATest, LoRAUtils) { + auto base_linear = std::make_shared(32, 64, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto lora_params = GetLoRAParameters(model); + EXPECT_EQ(lora_params.size(), 2); + + int64_t trainable = CountTrainableParameters(model); + int64_t expected_trainable = config.rank * 32 + 64 * config.rank; + EXPECT_EQ(trainable, expected_trainable); + + int64_t total = CountTotalParameters(model); + int64_t expected_total = 64 * 32 + 64 + config.rank * 32 + 64 * config.rank; + EXPECT_EQ(total, expected_total); +} + +TEST_F(LoRATest, ParseLoRATargetModules) { + auto modules = ParseLoRATargetModules("c_attn"); + EXPECT_EQ(modules.size(), 1); + EXPECT_TRUE(modules.count("c_attn")); + + modules = ParseLoRATargetModules("c_attn,c_proj,c_fc"); + EXPECT_EQ(modules.size(), 3); + EXPECT_TRUE(modules.count("c_attn")); + EXPECT_TRUE(modules.count("c_proj")); + EXPECT_TRUE(modules.count("c_fc")); + + modules = ParseLoRATargetModules("c_attn, c_proj , c_fc"); + EXPECT_EQ(modules.size(), 3); + + modules = ParseLoRATargetModules("c_attn,,c_proj"); + EXPECT_EQ(modules.size(), 2); +} + +TEST_F(LoRATest, ShouldApplyLoRAEdgeCases) { + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,attn.c_proj")}; + EXPECT_TRUE(config.ShouldApplyLoRA("attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); + EXPECT_FALSE(config.ShouldApplyLoRA("mlp.c_proj")); + } + + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,mlp.c_proj")}; + EXPECT_FALSE(config.ShouldApplyLoRA("attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("mlp.c_proj")); + } + + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,c_proj")}; + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); + } +} + +TEST_F(LoRATest, FreezeUnfreeze) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto all_params = model->Parameters(); + + int64_t total_trainable = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + total_trainable += p->NumElements(); + } + } + int64_t expected = config.rank * 64 + 128 * config.rank; + EXPECT_EQ(total_trainable, expected); + + FreezeBaseModel(model); + + int64_t after_freeze = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + after_freeze += p->NumElements(); + } + } + EXPECT_EQ(after_freeze, expected); + + UnfreezeModel(model); + int64_t after_unfreeze = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + after_unfreeze += p->NumElements(); + } + } + int64_t expected_unfreeze = 64 * 128 + 128 + config.rank * 64 + 128 * config.rank; + EXPECT_EQ(after_unfreeze, expected_unfreeze); +} + +TEST_F(LoRATest, LoRAStateDict) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto state_dict = model->StateDict(); + + EXPECT_TRUE(state_dict.count("weight")); + EXPECT_TRUE(state_dict.count("bias")); + EXPECT_TRUE(state_dict.count("lora_A")); + EXPECT_TRUE(state_dict.count("lora_B")); + + EXPECT_TRUE(state_dict.at("lora_A")->requires_grad()); + EXPECT_TRUE(state_dict.at("lora_B")->requires_grad()); + EXPECT_FALSE(state_dict.at("weight")->requires_grad()); + + EXPECT_EQ(state_dict.at("lora_A")->Dims()[0], config.rank); + EXPECT_EQ(state_dict.at("lora_A")->Dims()[1], 64); + EXPECT_EQ(state_dict.at("lora_B")->Dims()[0], 128); + EXPECT_EQ(state_dict.at("lora_B")->Dims()[1], config.rank); +} + +TEST_F(LoRATest, GetLoRAModel) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + EXPECT_NE(model, nullptr); + + auto lora_params = GetLoRAParameters(model); + EXPECT_EQ(lora_params.size(), 2); + + int64_t total_elements = 0; + for (const auto &t : lora_params) { + total_elements += t->NumElements(); + } + int64_t expected_elements = config.rank * 64 + 128 * config.rank; + EXPECT_EQ(total_elements, expected_elements); + + MergeLoRAWeights(model); + auto *lora_mod = dynamic_cast(model.get()); + EXPECT_NE(lora_mod, nullptr); + EXPECT_FALSE(lora_mod->LoRAParameters()[0]->requires_grad()); + + UnmergeLoRAWeights(model); + EXPECT_TRUE(lora_mod->LoRAParameters()[0]->requires_grad()); +} + +TEST_F(LoRATest, MergeAndUnload) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + auto model = GetLoRAModel(base_linear, config); + + EXPECT_NE(dynamic_cast(model.get()), nullptr); + + auto input = std::make_shared(std::vector{2, 5, 64}, DataType::kFLOAT32); + input->EigenMatrix().setRandom(); + auto output_before = (*model)({input})[0]; + float output_before_sum = output_before->EigenMatrix().sum(); + + auto unloaded_model = MergeAndUnload(model); + EXPECT_NE(unloaded_model, nullptr); + EXPECT_EQ(dynamic_cast(unloaded_model.get()), nullptr); + + auto state_dict = unloaded_model->StateDict(); + for (const auto &[name, param] : state_dict) { + EXPECT_EQ(name.find("lora_A"), std::string::npos); + EXPECT_EQ(name.find("lora_B"), std::string::npos); + } + + auto output_after = (*unloaded_model)({input})[0]; + float output_after_sum = output_after->EigenMatrix().sum(); + EXPECT_NEAR(std::abs(output_before_sum - output_after_sum), 0.0f, 1e-3); + + for (const auto ¶m : unloaded_model->Parameters()) { + EXPECT_TRUE(param->requires_grad()); + } +} diff --git a/tests/optimizer/CMakeLists.txt b/tests/optimizer/CMakeLists.txt new file mode 100644 index 00000000..3dc00354 --- /dev/null +++ b/tests/optimizer/CMakeLists.txt @@ -0,0 +1,22 @@ +# ============================================================================ +# Optimizer tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Optimizer tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_optimizer_creation SOURCES test_optimizer_creation.cc LABELS cpu) +infini_train_add_test(test_optimizer_step SOURCES test_optimizer_step.cc LABELS cpu) +infini_train_add_test(test_optimizer_cuda SOURCES test_optimizer_cuda.cc LABELS cuda) +infini_train_add_test(test_optimizer_distributed SOURCES test_optimizer_distributed.cc LABELS cuda distributed) + +# ----------------------------------------------------------------------------- +# Legacy combined tests +# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现 +# ----------------------------------------------------------------------------- +infini_train_add_test(test_optimizer_legacy SOURCES test_optimizer.cc LABELS cpu cuda distributed) diff --git a/tests/optimizer/test_optimizer.cc b/tests/optimizer/test_optimizer.cc new file mode 100644 index 00000000..6baa34e5 --- /dev/null +++ b/tests/optimizer/test_optimizer.cc @@ -0,0 +1,183 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class OptimizerTestBase : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class OptimizerCreationTest : public OptimizerTestBase {}; +class OptimizerGradTest : public OptimizerTestBase {}; +class OptimizerCudaTest : public OptimizerTestBase {}; +class OptimizerDistributedTest : public OptimizerTestBase {}; + +TEST_F(OptimizerCreationTest, SGDCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, AdamCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerGradTest, ZeroGrad) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerCreationTest, SGDMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerCudaTest, SGDCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCudaTest, AdamCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCudaTest, ZeroGradCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCudaTest, SGDMultiParamsCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedSGD) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedAdam) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedZeroGrad) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +#endif +} diff --git a/tests/optimizer/test_optimizer_creation.cc b/tests/optimizer/test_optimizer_creation.cc new file mode 100644 index 00000000..fdb69d70 --- /dev/null +++ b/tests/optimizer/test_optimizer_creation.cc @@ -0,0 +1,82 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerCreationTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerCreationTest, SGDCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, AdamCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDWithMomentum) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, AdamMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.001); + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDWithWeightDecay) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} diff --git a/tests/optimizer/test_optimizer_cuda.cc b/tests/optimizer/test_optimizer_cuda.cc new file mode 100644 index 00000000..b56ace2e --- /dev/null +++ b/tests/optimizer/test_optimizer_cuda.cc @@ -0,0 +1,93 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerCUDATest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerCUDATest, SGDCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, AdamCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, ZeroGradCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, SGDMultiParamsCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerCUDATest, AdamStepCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + optimizer->ZeroGrad(); + optimizer->Step(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} diff --git a/tests/optimizer/test_optimizer_distributed.cc b/tests/optimizer/test_optimizer_distributed.cc new file mode 100644 index 00000000..f4f092bc --- /dev/null +++ b/tests/optimizer/test_optimizer_distributed.cc @@ -0,0 +1,75 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerDistributedTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerDistributedTest, DistributedSGD) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedAdam) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedZeroGrad) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedMultiParams) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + std::vector> params; + for (int i = 0; i < 2; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); + optimizer->Step(); +#endif +} diff --git a/tests/optimizer/test_optimizer_step.cc b/tests/optimizer/test_optimizer_step.cc new file mode 100644 index 00000000..d5407998 --- /dev/null +++ b/tests/optimizer/test_optimizer_step.cc @@ -0,0 +1,62 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerStepTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerStepTest, SGDStep) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + optimizer->Step(); +} + +TEST_F(OptimizerStepTest, AdamStep) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + optimizer->ZeroGrad(); + optimizer->Step(); +} + +TEST_F(OptimizerStepTest, ZeroGrad) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerStepTest, ZeroGradWithNone) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(false); +} diff --git a/tests/slow/CMakeLists.txt b/tests/slow/CMakeLists.txt new file mode 100644 index 00000000..dd76f187 --- /dev/null +++ b/tests/slow/CMakeLists.txt @@ -0,0 +1,13 @@ +# ============================================================================ +# Slow tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS slow cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Slow tests (标签包含 slow) +# ----------------------------------------------------------------------------- +infini_train_add_test(test_slow SOURCES test_slow.cc LABELS slow cpu cuda distributed) diff --git a/tests/slow/test_slow.cc b/tests/slow/test_slow.cc new file mode 100644 index 00000000..a3c9628e --- /dev/null +++ b/tests/slow/test_slow.cc @@ -0,0 +1,28 @@ +#include +#include + +#include + +#include "test_utils.h" + +using namespace infini_train; + +TEST(SlowTest, Cpu) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + EXPECT_TRUE(true); +} + +TEST(SlowTest, Cuda) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto count = infini_train::test::GetCudaDeviceCount(); + EXPECT_GT(count, 0); +#endif +} + +TEST(SlowTest, Distributed) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + EXPECT_GE(infini_train::test::GetCudaDeviceCount(), 2); +#endif +} diff --git a/tests/tensor/CMakeLists.txt b/tests/tensor/CMakeLists.txt new file mode 100644 index 00000000..3df1708e --- /dev/null +++ b/tests/tensor/CMakeLists.txt @@ -0,0 +1,126 @@ +# Tensor tests + +# test_tensor_create +add_executable(test_tensor_create + test_tensor_create.cc +) +target_compile_options(test_tensor_create PRIVATE -Wno-error) +target_link_libraries(test_tensor_create + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_create PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_create PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_create COMMAND test_tensor_create) +set_tests_properties(tensor_create PROPERTIES LABELS "cpu;cuda") + +# test_tensor_copy +add_executable(test_tensor_copy + test_tensor_copy.cc +) +target_compile_options(test_tensor_copy PRIVATE -Wno-error) +target_link_libraries(test_tensor_copy + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_copy PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_copy PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_copy COMMAND test_tensor_copy) +set_tests_properties(tensor_copy PROPERTIES LABELS "cpu;cuda") + +# test_tensor_delete +add_executable(test_tensor_delete + test_tensor_delete.cc +) +target_compile_options(test_tensor_delete PRIVATE -Wno-error) +target_link_libraries(test_tensor_delete + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_delete PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_delete PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_delete COMMAND test_tensor_delete) +set_tests_properties(tensor_delete PROPERTIES LABELS "cpu") + +# test_tensor_op (keep original for backward compatibility) +add_executable(test_tensor_op + test_tensor.cc +) +target_compile_options(test_tensor_op PRIVATE -Wno-error) +target_link_libraries(test_tensor_op + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_op PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_op PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_ops COMMAND test_tensor_op --gtest_filter=TensorOpTest.*) +set_tests_properties(tensor_ops PROPERTIES LABELS "cpu;cuda") + +# test_tensor_distributed (keep original for backward compatibility) +add_executable(test_tensor_dist + test_tensor.cc +) +target_compile_options(test_tensor_dist PRIVATE -Wno-error) +target_link_libraries(test_tensor_dist + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_dist PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_dist PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_distributed COMMAND test_tensor_dist --gtest_filter=TensorDistributedTest.*) +set_tests_properties(tensor_distributed PROPERTIES LABELS "cuda;distributed") + +# Convenience aggregate target so `cmake --build ... --target test_tensor` works +add_custom_target(test_tensor + DEPENDS + test_tensor_create + test_tensor_copy + test_tensor_delete + test_tensor_op + test_tensor_dist +) diff --git a/tests/tensor/test_tensor.cc b/tests/tensor/test_tensor.cc new file mode 100644 index 00000000..23eb940c --- /dev/null +++ b/tests/tensor/test_tensor.cc @@ -0,0 +1,255 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorTestBase : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } + + static size_t Numel(const std::shared_ptr& tensor) { + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + return n; + } + + static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + auto n = Numel(tensor); + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } + } +}; + +class TensorCreateTest : public TensorTestBase {}; +class TensorCopyTest : public TensorTestBase {}; +class TensorDeleteTest : public TensorTestBase {}; +class TensorOpTest : public TensorTestBase {}; +class TensorDistributedTest : public TensorTestBase {}; + +TEST_F(TensorCreateTest, CreatesCpuTensorWithShapeAndType) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorCreateTest, TracksRequiresGrad) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +} + +TEST_F(TensorCreateTest, ProvidesDataPointer) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +} + +TEST_F(TensorCreateTest, SupportsMultipleShapes) { + std::vector> shapes = { + {2, 3}, + {4, 5, 6}, + {10}, + {1, 1, 1, 1} + }; + + for (const auto& shape : shapes) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dims(), shape); + } +} + +TEST_F(TensorCreateTest, SupportsMultipleDtypes) { + std::vector dtypes = { + DataType::kFLOAT32, + DataType::kBFLOAT16, + }; + + for (const auto& dtype : dtypes) { + auto tensor = std::make_shared(std::vector{2, 3}, dtype, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dtype(), dtype); + } +} + +TEST_F(TensorCreateTest, CreatesTensorOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +#endif +} + +TEST_F(TensorCreateTest, TracksRequiresGradOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorCreateTest, ProvidesDataPointerOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +#endif +} + +TEST_F(TensorCopyTest, CopiesCPUToCPU) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 1.0f); + + target->CopyFrom(source); + + auto* target_data = static_cast(target->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(target_data[i], 1.0f + static_cast(i)); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + + FillSequential(cpu_tensor, 0.0f); + cuda_tensor->CopyFrom(cpu_tensor); + + EXPECT_TRUE(cuda_tensor->IsCUDA()); +#endif +} + +TEST_F(TensorCopyTest, CopiesCUDAtoCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + FillSequential(source, 2.0f); + + target->CopyFrom(source); + + EXPECT_TRUE(target->IsCUDA()); +#endif +} + +TEST_F(TensorOpTest, MatmulCUDAAllocatesOutputs) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{3, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto c = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(a->DataPtr(), nullptr); + EXPECT_NE(b->DataPtr(), nullptr); + EXPECT_NE(c->DataPtr(), nullptr); + EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(b->IsCUDA()); + EXPECT_TRUE(c->IsCUDA()); +#endif +} + +TEST_F(TensorDeleteTest, ReleasesResourcesOnReset) { + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +} + +TEST_F(TensorDeleteTest, MoveTransferKeepsData) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 5.0f); + + auto moved = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + ASSERT_NE(moved, nullptr); + + auto* data = static_cast(moved->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(data[i], 5.0f + static_cast(i)); + } +} + +TEST_F(TensorDistributedTest, AllReduce) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + auto* data = static_cast(tensor->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorDistributedTest, AllGather) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{4, 4})); +#endif +} + +TEST_F(TensorDistributedTest, ReduceScatter) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 8})); +#endif +} diff --git a/tests/tensor/test_tensor_copy.cc b/tests/tensor/test_tensor_copy.cc new file mode 100644 index 00000000..452062c4 --- /dev/null +++ b/tests/tensor/test_tensor_copy.cc @@ -0,0 +1,123 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorCopyTest : public infini_train::test::TensorTestBase {}; + +static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCPU) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 1.0f); + + target->CopyFrom(source); + + auto* target_data = static_cast(target->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(target_data[i], 1.0f + static_cast(i)); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + + FillSequential(cpu_tensor, 0.0f); + cuda_tensor->CopyFrom(cpu_tensor); + + EXPECT_TRUE(cuda_tensor->IsCUDA()); +#endif +} + +TEST_F(TensorCopyTest, CopiesCUDAtoCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + FillSequential(source, 2.0f); + + target->CopyFrom(source); + + EXPECT_TRUE(target->IsCUDA()); +#endif +} + +TEST_F(TensorCopyTest, CopiesCUDAtoCPU) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + + FillSequential(cuda_tensor, 1.0f); + cpu_tensor->CopyFrom(cuda_tensor); + + EXPECT_FALSE(cpu_tensor->IsCUDA()); + EXPECT_TRUE(cpu_tensor->IsCPU()); +#endif +} + +TEST_F(TensorCopyTest, CopiesBetweenSameShape) { + auto source = std::make_shared(std::vector{4, 5, 6}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{4, 5, 6}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 0.0f); + + target->CopyFrom(source); + + EXPECT_EQ(source->Dims(), target->Dims()); +} + +TEST_F(TensorCopyTest, CopiesPreservesDataType) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + + EXPECT_EQ(source->Dtype(), target->Dtype()); + target->CopyFrom(source); + EXPECT_EQ(target->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorCopyTest, CopiesWithDifferentDeviceId) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 1)); + FillSequential(source, 5.0f); + + target->CopyFrom(source); + + EXPECT_EQ(source->GetDevice().index(), 0); + EXPECT_EQ(target->GetDevice().index(), 1); +#endif +} diff --git a/tests/tensor/test_tensor_create.cc b/tests/tensor/test_tensor_create.cc new file mode 100644 index 00000000..c39246e3 --- /dev/null +++ b/tests/tensor/test_tensor_create.cc @@ -0,0 +1,94 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorCreateTest : public infini_train::test::TensorTestBase {}; + +TEST_F(TensorCreateTest, CreatesCpuTensorWithShapeAndType) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorCreateTest, TracksRequiresGrad) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +} + +TEST_F(TensorCreateTest, ProvidesDataPointer) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +} + +TEST_F(TensorCreateTest, SupportsMultipleShapes) { + std::vector> shapes = { + {2, 3}, + {4, 5, 6}, + {10}, + {1, 1, 1, 1} + }; + + for (const auto& shape : shapes) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dims(), shape); + } +} + +TEST_F(TensorCreateTest, SupportsMultipleDtypes) { + std::vector dtypes = { + DataType::kFLOAT32, + DataType::kBFLOAT16, + }; + + for (const auto& dtype : dtypes) { + auto tensor = std::make_shared(std::vector{2, 3}, dtype, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dtype(), dtype); + } +} + +TEST_F(TensorCreateTest, CreatesTensorOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +#endif +} + +TEST_F(TensorCreateTest, TracksRequiresGradOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorCreateTest, ProvidesDataPointerOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +#endif +} diff --git a/tests/tensor/test_tensor_delete.cc b/tests/tensor/test_tensor_delete.cc new file mode 100644 index 00000000..e959b807 --- /dev/null +++ b/tests/tensor/test_tensor_delete.cc @@ -0,0 +1,104 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorDeleteTest : public infini_train::test::TensorTestBase {}; + +static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } +} + +TEST_F(TensorDeleteTest, ReleasesResourcesOnReset) { + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +} + +TEST_F(TensorDeleteTest, MoveTransferKeepsData) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 5.0f); + + auto moved = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + ASSERT_NE(moved, nullptr); + + auto* data = static_cast(moved->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(data[i], 5.0f + static_cast(i)); + } +} + +TEST_F(TensorDeleteTest, NullifiesPointerOnMove) { + auto tensor = std::make_shared(std::vector{3, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + + auto moved_tensor = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + EXPECT_NE(moved_tensor, nullptr); +} + +TEST_F(TensorDeleteTest, SharedPtrRefCountOnCopy) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 1.0f); + + auto copy1 = tensor; + auto copy2 = tensor; + + EXPECT_EQ(tensor.use_count(), 3); + EXPECT_EQ(copy1.use_count(), 3); + EXPECT_EQ(copy2.use_count(), 3); + + copy1.reset(); + EXPECT_EQ(tensor.use_count(), 2); + + copy2.reset(); + EXPECT_EQ(tensor.use_count(), 1); + + EXPECT_NE(tensor, nullptr); +} + +TEST_F(TensorDeleteTest, TensorDestroyedAfterScope) { + bool destroyed = false; + { + auto tensor = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + } +} + +TEST_F(TensorDeleteTest, ReleaseMemoryOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{100, 100}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->IsCUDA()); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +#endif +}