From ac79266dbeec90ad50b85d03f746032b2650e17b Mon Sep 17 00:00:00 2001 From: PPPoint <1024879159@qq.com> Date: Fri, 30 Jan 2026 20:00:19 +0800 Subject: [PATCH 1/3] =?UTF-8?q?Finish=20T1-1-11:=20gcd=E3=80=81select=5Fsc?= =?UTF-8?q?atter=E3=80=81nll=5Floss=E3=80=81glu=E3=80=81gt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/infinicore/ops/gcd.hpp | 18 +++ include/infinicore/ops/glu.hpp | 18 +++ include/infinicore/ops/gt.hpp | 18 +++ include/infinicore/ops/nll_loss.hpp | 19 +++ include/infinicore/ops/select_scatter.hpp | 18 +++ python/infinicore/__init__.py | 6 + python/infinicore/nn/functional/__init__.py | 4 + python/infinicore/nn/functional/glu.py | 10 ++ python/infinicore/nn/functional/nll_loss.py | 39 ++++++ python/infinicore/ops/gcd.py | 15 +++ python/infinicore/ops/gt.py | 16 +++ python/infinicore/ops/select_scatter.py | 9 ++ src/infinicore/ops/gcd/gcd.cc | 26 ++++ src/infinicore/ops/gcd/gcd_cpu.cc | 107 ++++++++++++++++ src/infinicore/ops/glu/glu.cc | 31 +++++ src/infinicore/ops/glu/glu_cpu.cc | 108 ++++++++++++++++ src/infinicore/ops/gt/gt.cc | 21 +++ src/infinicore/ops/gt/gt_cpu.cc | 71 +++++++++++ src/infinicore/ops/nll_loss/nll_loss.cc | 26 ++++ src/infinicore/ops/nll_loss/nll_loss_cpu.cc | 120 ++++++++++++++++++ .../ops/select_scatter/select_scatter.cc | 26 ++++ .../ops/select_scatter/select_scatter_cpu.cc | 115 +++++++++++++++++ src/infinicore/pybind11/ops.hpp | 10 ++ src/infinicore/pybind11/ops/gcd.hpp | 21 +++ src/infinicore/pybind11/ops/glu.hpp | 21 +++ src/infinicore/pybind11/ops/gt.hpp | 19 +++ src/infinicore/pybind11/ops/nll_loss.hpp | 29 +++++ .../pybind11/ops/select_scatter.hpp | 18 +++ test/infinicore/ops/gcd.py | 6 +- test/infinicore/ops/glu.py | 6 +- test/infinicore/ops/gt.py | 6 +- test/infinicore/ops/nll_loss.py | 6 +- test/infinicore/ops/select_scatter.py | 6 +- 33 files changed, 974 insertions(+), 15 deletions(-) create mode 100644 include/infinicore/ops/gcd.hpp create mode 100644 include/infinicore/ops/glu.hpp create mode 100644 include/infinicore/ops/gt.hpp create mode 100644 include/infinicore/ops/nll_loss.hpp create mode 100644 include/infinicore/ops/select_scatter.hpp create mode 100644 python/infinicore/nn/functional/glu.py create mode 100644 python/infinicore/nn/functional/nll_loss.py create mode 100644 python/infinicore/ops/gcd.py create mode 100644 python/infinicore/ops/gt.py create mode 100644 python/infinicore/ops/select_scatter.py create mode 100644 src/infinicore/ops/gcd/gcd.cc create mode 100644 src/infinicore/ops/gcd/gcd_cpu.cc create mode 100644 src/infinicore/ops/glu/glu.cc create mode 100644 src/infinicore/ops/glu/glu_cpu.cc create mode 100644 src/infinicore/ops/gt/gt.cc create mode 100644 src/infinicore/ops/gt/gt_cpu.cc create mode 100644 src/infinicore/ops/nll_loss/nll_loss.cc create mode 100644 src/infinicore/ops/nll_loss/nll_loss_cpu.cc create mode 100644 src/infinicore/ops/select_scatter/select_scatter.cc create mode 100644 src/infinicore/ops/select_scatter/select_scatter_cpu.cc create mode 100644 src/infinicore/pybind11/ops/gcd.hpp create mode 100644 src/infinicore/pybind11/ops/glu.hpp create mode 100644 src/infinicore/pybind11/ops/gt.hpp create mode 100644 src/infinicore/pybind11/ops/nll_loss.hpp create mode 100644 src/infinicore/pybind11/ops/select_scatter.hpp diff --git a/include/infinicore/ops/gcd.hpp b/include/infinicore/ops/gcd.hpp new file mode 100644 index 000000000..21608a0ee --- /dev/null +++ b/include/infinicore/ops/gcd.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" + +namespace infinicore::op { + +class Gcd { +public: + using schema = void (*)(Tensor, Tensor, Tensor); + static void execute(Tensor input, Tensor other, Tensor output); + static common::OpDispatcher &dispatcher(); +}; + +Tensor gcd(Tensor input, Tensor other); +void gcd_(Tensor input, Tensor other, Tensor output); + +} // namespace infinicore::op \ No newline at end of file diff --git a/include/infinicore/ops/glu.hpp b/include/infinicore/ops/glu.hpp new file mode 100644 index 000000000..167b0beb2 --- /dev/null +++ b/include/infinicore/ops/glu.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" + +namespace infinicore::op { + +class Glu { +public: + using schema = void (*)(Tensor, Tensor, int); + static void execute(Tensor input, Tensor output, int dim); + static common::OpDispatcher &dispatcher(); +}; + +Tensor glu(Tensor input, int dim); +void glu_(Tensor input, Tensor output, int dim); + +} // namespace infinicore::op \ No newline at end of file diff --git a/include/infinicore/ops/gt.hpp b/include/infinicore/ops/gt.hpp new file mode 100644 index 000000000..a04692eeb --- /dev/null +++ b/include/infinicore/ops/gt.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" + +namespace infinicore::op { + +class Gt { +public: + using schema = void (*)(Tensor, Tensor, Tensor); + static void execute(Tensor input, Tensor other, Tensor output); + static common::OpDispatcher &dispatcher(); +}; + +Tensor gt(Tensor input, Tensor other); +void gt_(Tensor input, Tensor other, Tensor output); + +} // namespace infinicore::op \ No newline at end of file diff --git a/include/infinicore/ops/nll_loss.hpp b/include/infinicore/ops/nll_loss.hpp new file mode 100644 index 000000000..8feba68cb --- /dev/null +++ b/include/infinicore/ops/nll_loss.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" +#include + +namespace infinicore::op { + +class NLLLoss { +public: + using schema = void (*)(Tensor, Tensor, std::optional, Tensor, int64_t); + static void execute(Tensor input, Tensor target, std::optional weight, Tensor output, int64_t ignore_index); + static common::OpDispatcher &dispatcher(); +}; + +Tensor nll_loss(Tensor input, Tensor target, std::optional weight, int64_t ignore_index); +void nll_loss_(Tensor input, Tensor target, std::optional weight, Tensor output, int64_t ignore_index); + +} // namespace infinicore::op \ No newline at end of file diff --git a/include/infinicore/ops/select_scatter.hpp b/include/infinicore/ops/select_scatter.hpp new file mode 100644 index 000000000..2e1348f14 --- /dev/null +++ b/include/infinicore/ops/select_scatter.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" + +namespace infinicore::op { + +class SelectScatter { +public: + using schema = void (*)(Tensor, Tensor, int64_t, int64_t, Tensor); + static void execute(Tensor input, Tensor src, int64_t dim, int64_t index, Tensor output); + static common::OpDispatcher &dispatcher(); +}; + +Tensor select_scatter(Tensor input, Tensor src, int64_t dim, int64_t index); +void select_scatter_(Tensor input, Tensor src, int64_t dim, int64_t index, Tensor output); + +} // namespace infinicore::op \ No newline at end of file diff --git a/python/infinicore/__init__.py b/python/infinicore/__init__.py index c6b01d5aa..16b64ded3 100644 --- a/python/infinicore/__init__.py +++ b/python/infinicore/__init__.py @@ -54,6 +54,9 @@ from infinicore.ops.rearrange import rearrange from infinicore.ops.squeeze import squeeze from infinicore.ops.unsqueeze import unsqueeze +from infinicore.ops.gcd import gcd +from infinicore.ops.gt import gt +from infinicore.ops.select_scatter import select_scatter from infinicore.tensor import ( Tensor, empty, @@ -134,6 +137,9 @@ "strided_empty", "strided_from_blob", "zeros", + "gcd", + "select_scatter", + "gt", ] use_ntops = False diff --git a/python/infinicore/nn/functional/__init__.py b/python/infinicore/nn/functional/__init__.py index 255079790..18908ad1b 100644 --- a/python/infinicore/nn/functional/__init__.py +++ b/python/infinicore/nn/functional/__init__.py @@ -6,6 +6,8 @@ from .rope import RopeAlgo, rope from .silu import silu from .swiglu import swiglu +from .nll_loss import nll_loss +from .glu import glu __all__ = [ "causal_softmax", @@ -17,4 +19,6 @@ "embedding", "rope", "RopeAlgo", + "nll_loss", + "glu", ] diff --git a/python/infinicore/nn/functional/glu.py b/python/infinicore/nn/functional/glu.py new file mode 100644 index 000000000..e9126ad35 --- /dev/null +++ b/python/infinicore/nn/functional/glu.py @@ -0,0 +1,10 @@ +import infinicore +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + +def glu(input: Tensor, dim: int = -1) -> Tensor: + + if infinicore.use_ntops and input.device.type in ("cuda", "musa"): + return infinicore.ntops.torch.glu(input, dim) + + return Tensor(_infinicore.glu(input._underlying, dim)) \ No newline at end of file diff --git a/python/infinicore/nn/functional/nll_loss.py b/python/infinicore/nn/functional/nll_loss.py new file mode 100644 index 000000000..4c731472b --- /dev/null +++ b/python/infinicore/nn/functional/nll_loss.py @@ -0,0 +1,39 @@ +import infinicore +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + +def nll_loss( + input: Tensor, + target: Tensor, + weight: Tensor | None = None, + ignore_index: int = -100, + reduction: str = "mean", + *, + out=None, +) -> Tensor: + + if infinicore.use_ntops and input.device.type in ("cuda", "musa"): + return infinicore.ntops.torch.nll_loss( + input, target, weight=weight, ignore_index=ignore_index, reduction=reduction + ) + + weight_underlying = weight._underlying if weight is not None else None + + if out is None: + return Tensor( + _infinicore.nll_loss( + input._underlying, + target._underlying, + weight_underlying, + ignore_index + ) + ) + + _infinicore.nll_loss_( + input._underlying, + target._underlying, + weight_underlying, + out._underlying, + ignore_index + ) + return out \ No newline at end of file diff --git a/python/infinicore/ops/gcd.py b/python/infinicore/ops/gcd.py new file mode 100644 index 000000000..defc96a52 --- /dev/null +++ b/python/infinicore/ops/gcd.py @@ -0,0 +1,15 @@ +import infinicore +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + +def gcd(input: Tensor, other: Tensor, *, out=None) -> Tensor: + r"""Computes the element-wise greatest common divisor (GCD).""" + + if infinicore.use_ntops and input.device.type in ("cuda", "musa"): + return infinicore.ntops.torch.gcd(input, other, out=out) + + if out is None: + return Tensor(_infinicore.gcd(input._underlying, other._underlying)) + + _infinicore.gcd_(input._underlying, other._underlying, out._underlying) + return out \ No newline at end of file diff --git a/python/infinicore/ops/gt.py b/python/infinicore/ops/gt.py new file mode 100644 index 000000000..f347df279 --- /dev/null +++ b/python/infinicore/ops/gt.py @@ -0,0 +1,16 @@ +import infinicore +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + +def gt(input: Tensor, other: Tensor | float, *, out: Tensor | None = None) -> Tensor: + if infinicore.use_ntops and input.device.type in ("cuda", "musa"): + return infinicore.ntops.torch.gt(input, other, out=out) + + if isinstance(other, (int, float)): + other = Tensor.full(input.shape, other, dtype=input.dtype, device=input.device) + + if out is None: + return Tensor(_infinicore.gt(input._underlying, other._underlying)) + + _infinicore.gt_(input._underlying, other._underlying, out._underlying) + return out \ No newline at end of file diff --git a/python/infinicore/ops/select_scatter.py b/python/infinicore/ops/select_scatter.py new file mode 100644 index 000000000..31af019df --- /dev/null +++ b/python/infinicore/ops/select_scatter.py @@ -0,0 +1,9 @@ +import infinicore +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + +def select_scatter(input: Tensor, src: Tensor, dim: int, index: int) -> Tensor: + if infinicore.use_ntops and input.device.type in ("cuda", "musa"): + return infinicore.ntops.torch.select_scatter(input, src, dim, index) + + return Tensor(_infinicore.select_scatter(input._underlying, src._underlying, dim, index)) \ No newline at end of file diff --git a/src/infinicore/ops/gcd/gcd.cc b/src/infinicore/ops/gcd/gcd.cc new file mode 100644 index 000000000..b2e152d2b --- /dev/null +++ b/src/infinicore/ops/gcd/gcd.cc @@ -0,0 +1,26 @@ +#include "infinicore/ops/gcd.hpp" +#include "../../utils.hpp" + +namespace infinicore::op { + +common::OpDispatcher &Gcd::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +}; + +void Gcd::execute(Tensor input, Tensor other, Tensor output) { + infinicore::context::setDevice(input->device()); + dispatcher().lookup(input->device().getType())(input, other, output); +} + +Tensor gcd(Tensor input, Tensor other) { + auto output = Tensor::empty(input->shape(), input->dtype(), input->device()); + gcd_(input, other, output); + return output; +} + +void gcd_(Tensor input, Tensor other, Tensor output) { + Gcd::execute(input, other, output); +} + +} // namespace infinicore::op \ No newline at end of file diff --git a/src/infinicore/ops/gcd/gcd_cpu.cc b/src/infinicore/ops/gcd/gcd_cpu.cc new file mode 100644 index 000000000..ed39adf32 --- /dev/null +++ b/src/infinicore/ops/gcd/gcd_cpu.cc @@ -0,0 +1,107 @@ +#include "../../../utils.h" +#include "infinicore/device.hpp" +#include "infinicore/ops/gcd.hpp" +#include +#include +#include +#include + +namespace infinicore::op::gcd_impl::cpu { + +template +T compute_gcd(T a, T b) { + return std::gcd(std::abs(a), std::abs(b)); +} + +template +void gcd_contiguous(const T *input_ptr, const T *other_ptr, T *output_ptr, size_t numel) { +#pragma omp parallel for + for (size_t i = 0; i < numel; ++i) { + auto a = utils::cast(input_ptr[i]); + auto b = utils::cast(other_ptr[i]); + output_ptr[i] = utils::cast(compute_gcd(a, b)); + } +} + +template +void gcd_strided(const T *input_base, const T *other_base, T *output_base, + const std::vector &shape, + const std::vector &input_strides, + const std::vector &other_strides, + const std::vector &output_strides) { + + size_t numel = 1; + for (auto s : shape) { + numel *= s; + } + int ndim = shape.size(); + +#pragma omp parallel for + for (size_t i = 0; i < numel; ++i) { + size_t temp_idx = i; + size_t input_offset = 0; + size_t other_offset = 0; + size_t output_offset = 0; + + for (int d = ndim - 1; d >= 0; --d) { + size_t coord = temp_idx % shape[d]; + temp_idx /= shape[d]; + + input_offset += coord * input_strides[d]; + other_offset += coord * other_strides[d]; + output_offset += coord * output_strides[d]; + } + + auto a = utils::cast(input_base[input_offset]); + auto b = utils::cast(other_base[other_offset]); + + output_base[output_offset] = utils::cast(compute_gcd(a, b)); + } +} + +void calculate(Tensor input, Tensor other, Tensor output) { + if (input->shape() != other->shape() || input->shape() != output->shape()) { + throw std::runtime_error("GCD CPU implementation requires all tensors to have the same shape."); + } + + bool all_contiguous = input->is_contiguous() && other->is_contiguous() && output->is_contiguous(); + auto dtype = input->dtype(); + size_t numel = input->numel(); + + if (dtype == DataType::I64) { + if (all_contiguous) { + gcd_contiguous( + reinterpret_cast(input->data()), + reinterpret_cast(other->data()), + reinterpret_cast(output->data()), numel); + } else { + gcd_strided( + reinterpret_cast(input->data()), + reinterpret_cast(other->data()), + reinterpret_cast(output->data()), + input->shape(), input->strides(), other->strides(), output->strides()); + } + } else if (dtype == DataType::I32) { + if (all_contiguous) { + gcd_contiguous( + reinterpret_cast(input->data()), + reinterpret_cast(other->data()), + reinterpret_cast(output->data()), numel); + } else { + gcd_strided( + reinterpret_cast(input->data()), + reinterpret_cast(other->data()), + reinterpret_cast(output->data()), + input->shape(), input->strides(), other->strides(), output->strides()); + } + } else { + throw std::runtime_error("GCD only supports I32 and I64 on CPU."); + } +} + +static bool registered = []() { + Gcd::dispatcher().registerDevice(Device::Type::CPU, &calculate); + return true; +}(); + +} // namespace infinicore::op::gcd_impl::cpu \ No newline at end of file diff --git a/src/infinicore/ops/glu/glu.cc b/src/infinicore/ops/glu/glu.cc new file mode 100644 index 000000000..e0d6e1179 --- /dev/null +++ b/src/infinicore/ops/glu/glu.cc @@ -0,0 +1,31 @@ +#include "infinicore/ops/glu.hpp" +#include "../../utils.hpp" + +namespace infinicore::op { + +common::OpDispatcher &Glu::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +} + +void Glu::execute(Tensor input, Tensor output, int dim) { + infinicore::context::setDevice(input->device()); + dispatcher().lookup(input->device().getType())(input, output, dim); +} + +Tensor glu(Tensor input, int dim) { + if (dim < 0) { + dim += input->ndim(); + } + auto out_shape = input->shape(); + out_shape[dim] /= 2; + auto output = Tensor::empty(out_shape, input->dtype(), input->device()); + glu_(input, output, dim); + return output; +} + +void glu_(Tensor input, Tensor output, int dim) { + Glu::execute(input, output, dim); +} + +} // namespace infinicore::op \ No newline at end of file diff --git a/src/infinicore/ops/glu/glu_cpu.cc b/src/infinicore/ops/glu/glu_cpu.cc new file mode 100644 index 000000000..efa136d33 --- /dev/null +++ b/src/infinicore/ops/glu/glu_cpu.cc @@ -0,0 +1,108 @@ +#include "../../../utils.h" +#include "infinicore/device.hpp" +#include "infinicore/ops/glu.hpp" +#include +#include + +namespace infinicore::op::glu_impl::cpu { + +template +inline T sigmoid(T x) { + float val = utils::cast(x); + float res = 1.0f / (1.0f + std::exp(-val)); + return utils::cast(res); +} + +template +void calculate_glu_cpu(Tensor input, Tensor output, int dim) { + auto in_shape = input->shape(); + auto in_strides = input->strides(); + auto out_shape = output->shape(); + auto out_strides = output->strides(); + int ndim = input->ndim(); + + if (dim < 0) { + dim += ndim; + } + + size_t M = 1; + for (int i = 0; i < dim; ++i) { + M *= in_shape[i]; + } + size_t split_size = out_shape[dim]; + size_t N = 1; + for (int i = dim + 1; i < ndim; ++i) { + N *= in_shape[i]; + } + + const T *in_ptr = reinterpret_cast(input->data()); + T *out_ptr = reinterpret_cast(output->data()); + +#pragma omp parallel for collapse(2) + for (size_t i = 0; i < M; ++i) { + for (size_t j = 0; j < N; ++j) { + for (size_t k = 0; k < split_size; ++k) { + + size_t offset_base = i * in_strides[dim] * (split_size * 2) + j * in_strides[ndim - 1]; + auto get_offset = [&](size_t d_idx, size_t k_val) { + size_t off = 0; + size_t remaining = i; + for (int d = dim - 1; d >= 0; --d) { + off += (remaining % in_shape[d]) * in_strides[d]; + remaining /= in_shape[d]; + } + off += (k_val)*in_strides[dim]; + remaining = j; + for (int d = ndim - 1; d > dim; --d) { + off += (remaining % in_shape[d]) * in_strides[d]; + remaining /= in_shape[d]; + } + return off; + }; + + size_t off_a = get_offset(dim, k); + size_t off_b = get_offset(dim, k + split_size); + + size_t off_out = 0; + size_t rem = i; + for (int d = dim - 1; d >= 0; --d) { + off_out += (rem % out_shape[d]) * out_strides[d]; + rem /= out_shape[d]; + } + off_out += k * out_strides[dim]; + rem = j; + for (int d = ndim - 1; d > dim; --d) { + off_out += (rem % out_shape[d]) * out_strides[d]; + rem /= out_shape[d]; + } + + T a = in_ptr[off_a]; + T b = in_ptr[off_b]; + + float val_a = utils::cast(a); + float sig_b = utils::cast(sigmoid(b)); + out_ptr[off_out] = utils::cast(val_a * sig_b); + } + } + } +} + +void calculate(Tensor input, Tensor output, int dim) { + auto dtype = input->dtype(); + if (dtype == DataType::F32) { + calculate_glu_cpu(input, output, dim); + } else if (dtype == DataType::F16) { + calculate_glu_cpu(input, output, dim); + } else if (dtype == DataType::BF16) { + calculate_glu_cpu(input, output, dim); + } else { + throw std::runtime_error("GLU unsupported dtype: " + toString(dtype)); + } +} + +static bool registered = []() { + Glu::dispatcher().registerDevice(Device::Type::CPU, &calculate); + return true; +}(); + +} // namespace infinicore::op::glu_impl::cpu \ No newline at end of file diff --git a/src/infinicore/ops/gt/gt.cc b/src/infinicore/ops/gt/gt.cc new file mode 100644 index 000000000..feec12ea1 --- /dev/null +++ b/src/infinicore/ops/gt/gt.cc @@ -0,0 +1,21 @@ +#include "infinicore/ops/gt.hpp" +#include "../../utils.hpp" + +namespace infinicore::op { +common::OpDispatcher &Gt::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +} +void Gt::execute(Tensor input, Tensor other, Tensor output) { + infinicore::context::setDevice(input->device()); + dispatcher().lookup(input->device().getType())(input, other, output); +} +Tensor gt(Tensor input, Tensor other) { + auto output = Tensor::empty(input->shape(), DataType::BOOL, input->device()); + gt_(input, other, output); + return output; +} +void gt_(Tensor input, Tensor other, Tensor output) { + Gt::execute(input, other, output); +} +} // namespace infinicore::op \ No newline at end of file diff --git a/src/infinicore/ops/gt/gt_cpu.cc b/src/infinicore/ops/gt/gt_cpu.cc new file mode 100644 index 000000000..ffecf9476 --- /dev/null +++ b/src/infinicore/ops/gt/gt_cpu.cc @@ -0,0 +1,71 @@ +#include "../../../utils.h" +#include "infinicore/device.hpp" +#include "infinicore/ops/gt.hpp" +#include + +namespace infinicore::op::gt_impl::cpu { + +template +void calculate_gt_cpu(Tensor input, Tensor other, Tensor output) { + auto in_ptr = reinterpret_cast(input->data()); + auto other_ptr = reinterpret_cast(other->data()); + auto out_base = output->data(); + + auto shape = input->shape(); + auto in_strides = input->strides(); + auto other_strides = other->strides(); + auto out_strides = output->strides(); + auto out_dtype = output->dtype(); + int ndim = input->ndim(); + size_t numel = input->numel(); + +#pragma omp parallel for + for (size_t i = 0; i < numel; ++i) { + size_t temp_idx = i; + size_t in_off = 0; + size_t other_off = 0; + size_t out_off = 0; + + for (int d = ndim - 1; d >= 0; --d) { + size_t coord = temp_idx % shape[d]; + temp_idx /= shape[d]; + in_off += coord * in_strides[d]; + other_off += coord * other_strides[d]; + out_off += coord * out_strides[d]; + } + + bool result = utils::cast(in_ptr[in_off]) > utils::cast(other_ptr[other_off]); + + if (out_dtype == DataType::BOOL) { + *(reinterpret_cast(out_base + out_off)) = result; + } else if (out_dtype == DataType::F32) { + *(reinterpret_cast(out_base + out_off * sizeof(float))) = result ? 1.0f : 0.0f; + } else if (out_dtype == DataType::F16) { + *(reinterpret_cast(out_base + out_off * sizeof(fp16_t))) = utils::cast(result ? 1.0f : 0.0f); + } else if (out_dtype == DataType::I32) { + *(reinterpret_cast(out_base + out_off * sizeof(int32_t))) = result ? 1 : 0; + } + } +} + +void calculate(Tensor input, Tensor other, Tensor output) { + auto dtype = input->dtype(); + if (dtype == DataType::F32) { + calculate_gt_cpu(input, other, output); + } else if (dtype == DataType::F16) { + calculate_gt_cpu(input, other, output); + } else if (dtype == DataType::BF16) { + calculate_gt_cpu(input, other, output); + } else if (dtype == DataType::I32) { + calculate_gt_cpu(input, other, output); + } else { + throw std::runtime_error("GT unsupported dtype"); + } +} + +static bool registered = []() { + Gt::dispatcher().registerDevice(Device::Type::CPU, &calculate); + return true; +}(); + +} // namespace infinicore::op::gt_impl::cpu \ No newline at end of file diff --git a/src/infinicore/ops/nll_loss/nll_loss.cc b/src/infinicore/ops/nll_loss/nll_loss.cc new file mode 100644 index 000000000..35c413b8c --- /dev/null +++ b/src/infinicore/ops/nll_loss/nll_loss.cc @@ -0,0 +1,26 @@ +#include "infinicore/ops/nll_loss.hpp" +#include "../../utils.hpp" + +namespace infinicore::op { + +common::OpDispatcher &NLLLoss::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +}; + +void NLLLoss::execute(Tensor input, Tensor target, std::optional weight, Tensor output, int64_t ignore_index) { + infinicore::context::setDevice(input->device()); + dispatcher().lookup(input->device().getType())(input, target, weight, output, ignore_index); +} + +Tensor nll_loss(Tensor input, Tensor target, std::optional weight, int64_t ignore_index) { + auto output = Tensor::empty({}, input->dtype(), input->device()); + nll_loss_(input, target, weight, output, ignore_index); + return output; +} + +void nll_loss_(Tensor input, Tensor target, std::optional weight, Tensor output, int64_t ignore_index) { + NLLLoss::execute(input, target, weight, output, ignore_index); +} + +} // namespace infinicore::op \ No newline at end of file diff --git a/src/infinicore/ops/nll_loss/nll_loss_cpu.cc b/src/infinicore/ops/nll_loss/nll_loss_cpu.cc new file mode 100644 index 000000000..23d81d0a3 --- /dev/null +++ b/src/infinicore/ops/nll_loss/nll_loss_cpu.cc @@ -0,0 +1,120 @@ +#include "../../../utils.h" +#include "infinicore/device.hpp" +#include "infinicore/ops/nll_loss.hpp" +#include +#include +#include +#include +#include + +namespace infinicore::op::nll_loss_impl::cpu { + +inline float bf16_to_f32(uint16_t val) { + uint32_t bits = static_cast(val) << 16; + float f; + std::memcpy(&f, &bits, sizeof(f)); + return f; +} + +template +void nll_loss_kernel(const Tensor &input, const Tensor &target, std::optional weight, Tensor &output, int64_t ignore_index) { + + const void *input_raw = input->data(); + const TargetT *target_data = reinterpret_cast(target->data()); + T *output_data = reinterpret_cast(output->data()); + const void *weight_raw = nullptr; + + if (weight.has_value() && weight.value()) { + weight_raw = weight.value()->data(); + } + + auto input_strides = input->strides(); + size_t batch_size = input->shape()[0]; + size_t n_classes = input->shape()[1]; + + int64_t input_stride_n = input_strides[0]; + int64_t input_stride_c = input_strides[1]; + int64_t target_stride = target->strides()[0]; + int64_t weight_stride = (weight.has_value() && weight.value()) ? weight.value()->strides()[0] : 0; + + auto dtype = input->dtype(); + double total_loss = 0.0; + double total_weight = 0.0; + +#pragma omp parallel for reduction(+ : total_loss, total_weight) + for (size_t i = 0; i < batch_size; ++i) { + TargetT t = target_data[i * target_stride]; + + if (t == ignore_index) { + continue; + } + + if (t < 0 || t >= static_cast(n_classes)) { + continue; + } + + double w_val = 1.0; + if (weight_raw) { + if (dtype == DataType::BF16) { + const uint16_t *w_ptr = reinterpret_cast(weight_raw); + w_val = static_cast(bf16_to_f32(w_ptr[t * weight_stride])); + } else { + const T *w_ptr = reinterpret_cast(weight_raw); + w_val = utils::cast(w_ptr[t * weight_stride]); + } + } + + size_t offset = i * input_stride_n + t * input_stride_c; + double logit_val = 0.0; + + if (dtype == DataType::BF16) { + const uint16_t *in_ptr = reinterpret_cast(input_raw); + logit_val = static_cast(bf16_to_f32(in_ptr[offset])); + } else { + const T *in_ptr = reinterpret_cast(input_raw); + logit_val = utils::cast(in_ptr[offset]); + } + + total_loss += (-logit_val * w_val); + total_weight += w_val; + } + + if (total_weight > 0) { + float res_f = static_cast(total_loss / total_weight); + if (dtype == DataType::BF16) { + uint32_t bits; + std::memcpy(&bits, &res_f, sizeof(bits)); + uint16_t bf16_val = static_cast(bits >> 16); + *reinterpret_cast(output_data) = bf16_val; + } else { + *output_data = utils::cast(res_f); + } + } else { + if (dtype == DataType::BF16) { + *reinterpret_cast(output_data) = 0; + } else { + *output_data = utils::cast(0.0f); + } + } +} + +void calculate(Tensor input, Tensor target, std::optional weight, Tensor output, int64_t ignore_index) { + auto dtype = input->dtype(); + + if (dtype == DataType::F32) { + nll_loss_kernel(input, target, weight, output, ignore_index); + } else if (dtype == DataType::F16) { + nll_loss_kernel(input, target, weight, output, ignore_index); + } else if (dtype == DataType::BF16) { + nll_loss_kernel(input, target, weight, output, ignore_index); + } else { + throw std::runtime_error("Unsupported dtype for nll_loss"); + } +} + +static bool registered = []() { + NLLLoss::dispatcher().registerDevice(Device::Type::CPU, &calculate); + return true; +}(); + +} // namespace infinicore::op::nll_loss_impl::cpu \ No newline at end of file diff --git a/src/infinicore/ops/select_scatter/select_scatter.cc b/src/infinicore/ops/select_scatter/select_scatter.cc new file mode 100644 index 000000000..02feb666f --- /dev/null +++ b/src/infinicore/ops/select_scatter/select_scatter.cc @@ -0,0 +1,26 @@ +#include "infinicore/ops/select_scatter.hpp" +#include "../../utils.hpp" + +namespace infinicore::op { + +common::OpDispatcher &SelectScatter::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +}; + +void SelectScatter::execute(Tensor input, Tensor src, int64_t dim, int64_t index, Tensor output) { + infinicore::context::setDevice(input->device()); + dispatcher().lookup(input->device().getType())(input, src, dim, index, output); +} + +Tensor select_scatter(Tensor input, Tensor src, int64_t dim, int64_t index) { + auto output = Tensor::empty(input->shape(), input->dtype(), input->device()); + select_scatter_(input, src, dim, index, output); + return output; +} + +void select_scatter_(Tensor input, Tensor src, int64_t dim, int64_t index, Tensor output) { + SelectScatter::execute(input, src, dim, index, output); +} + +} // namespace infinicore::op \ No newline at end of file diff --git a/src/infinicore/ops/select_scatter/select_scatter_cpu.cc b/src/infinicore/ops/select_scatter/select_scatter_cpu.cc new file mode 100644 index 000000000..97b8196e4 --- /dev/null +++ b/src/infinicore/ops/select_scatter/select_scatter_cpu.cc @@ -0,0 +1,115 @@ +#include "../../../utils.h" +#include "infinicore/device.hpp" +#include "infinicore/ops/select_scatter.hpp" +#include +#include +#include + +namespace infinicore::op::select_scatter_impl::cpu { + +template +void copy_kernel(T *dst_ptr, const std::vector &dst_shape, const std::vector &dst_strides, + const T *src_ptr, const std::vector &src_shape, const std::vector &src_strides) { + + size_t numel = 1; + for (auto s : dst_shape) { + numel *= s; + } + int ndim = dst_shape.size(); + + std::vector effective_src_strides = src_strides; + for (int i = 0; i < ndim; ++i) { + if (src_shape[i] == 1 && dst_shape[i] > 1) { + effective_src_strides[i] = 0; + } + } + +#pragma omp parallel for + for (size_t i = 0; i < numel; ++i) { + size_t temp_idx = i; + size_t dst_offset = 0; + size_t src_offset = 0; + + for (int d = ndim - 1; d >= 0; --d) { + size_t coord = temp_idx % dst_shape[d]; + temp_idx /= dst_shape[d]; + + dst_offset += coord * dst_strides[d]; + src_offset += coord * effective_src_strides[d]; + } + + dst_ptr[dst_offset] = utils::cast(src_ptr[src_offset]); + } +} + +void calculate(Tensor input, Tensor src, int64_t dim, int64_t index, Tensor output) { + auto ndim = input->ndim(); + if (dim < 0) { + dim += ndim; + } + if (index < 0) { + index += input->shape()[dim]; + } + + size_t total_numel = input->numel(); + auto dtype = input->dtype(); + + if (input->is_contiguous() && output->is_contiguous() && input->dtype() == output->dtype()) { + memcpy(output->data(), input->data(), total_numel * input->element_size()); + } else { + + if (dtype == DataType::F32) { + copy_kernel( + reinterpret_cast(output->data()), output->shape(), output->strides(), + reinterpret_cast(input->data()), input->shape(), input->strides()); + } else if (dtype == DataType::F16) { + copy_kernel( + reinterpret_cast(output->data()), output->shape(), output->strides(), + reinterpret_cast(input->data()), input->shape(), input->strides()); + } else if (dtype == DataType::BF16) { + copy_kernel( + reinterpret_cast(output->data()), output->shape(), output->strides(), + reinterpret_cast(input->data()), input->shape(), input->strides()); + } + } + + std::vector slice_shape = input->shape(); + slice_shape[dim] = 1; + + std::vector slice_strides = output->strides(); + + size_t slice_offset_bytes = index * slice_strides[dim] * output->element_size(); + + void *slice_data_ptr = reinterpret_cast(output->data()) + slice_offset_bytes; + + std::vector virtual_src_shape = src->shape(); + std::vector virtual_src_strides = src->strides(); + + if (virtual_src_shape.size() == ndim - 1) { + virtual_src_shape.insert(virtual_src_shape.begin() + dim, 1); + virtual_src_strides.insert(virtual_src_strides.begin() + dim, 0); + } + + if (dtype == DataType::F32) { + copy_kernel( + reinterpret_cast(slice_data_ptr), slice_shape, slice_strides, + reinterpret_cast(src->data()), virtual_src_shape, virtual_src_strides); + } else if (dtype == DataType::F16) { + copy_kernel( + reinterpret_cast(slice_data_ptr), slice_shape, slice_strides, + reinterpret_cast(src->data()), virtual_src_shape, virtual_src_strides); + } else if (dtype == DataType::BF16) { + copy_kernel( + reinterpret_cast(slice_data_ptr), slice_shape, slice_strides, + reinterpret_cast(src->data()), virtual_src_shape, virtual_src_strides); + } else { + throw std::runtime_error("Unsupported dtype for select_scatter"); + } +} + +static bool registered = []() { + SelectScatter::dispatcher().registerDevice(Device::Type::CPU, &calculate); + return true; +}(); + +} // namespace infinicore::op::select_scatter_impl::cpu \ No newline at end of file diff --git a/src/infinicore/pybind11/ops.hpp b/src/infinicore/pybind11/ops.hpp index 3d6ebe79a..5cdd05637 100644 --- a/src/infinicore/pybind11/ops.hpp +++ b/src/infinicore/pybind11/ops.hpp @@ -7,9 +7,13 @@ #include "ops/attention.hpp" #include "ops/causal_softmax.hpp" #include "ops/embedding.hpp" +#include "ops/gcd.hpp" +#include "ops/glu.hpp" +#include "ops/gt.hpp" #include "ops/linear.hpp" #include "ops/matmul.hpp" #include "ops/mul.hpp" +#include "ops/nll_loss.hpp" #include "ops/paged_attention.hpp" #include "ops/paged_attention_prefill.hpp" #include "ops/paged_caching.hpp" @@ -17,6 +21,7 @@ #include "ops/rearrange.hpp" #include "ops/rms_norm.hpp" #include "ops/rope.hpp" +#include "ops/select_scatter.hpp" #include "ops/silu.hpp" #include "ops/swiglu.hpp" @@ -42,6 +47,11 @@ inline void bind(py::module &m) { bind_swiglu(m); bind_rope(m); bind_embedding(m); + bind_gcd(m); + bind_select_scatter(m); + bind_nll_loss(m); + bind_glu(m); + bind_gt(m); } } // namespace infinicore::ops diff --git a/src/infinicore/pybind11/ops/gcd.hpp b/src/infinicore/pybind11/ops/gcd.hpp new file mode 100644 index 000000000..16bcfcaa1 --- /dev/null +++ b/src/infinicore/pybind11/ops/gcd.hpp @@ -0,0 +1,21 @@ +#pragma once +#include "infinicore/ops/gcd.hpp" +#include + +namespace py = pybind11; +namespace infinicore::ops { + +inline void bind_gcd(py::module &m) { + m.def("gcd", + &op::gcd, + py::arg("input"), + py::arg("other")); + + m.def("gcd_", + &op::gcd_, + py::arg("input"), + py::arg("other"), + py::arg("output")); +} + +} // namespace infinicore::ops \ No newline at end of file diff --git a/src/infinicore/pybind11/ops/glu.hpp b/src/infinicore/pybind11/ops/glu.hpp new file mode 100644 index 000000000..569ad7d6d --- /dev/null +++ b/src/infinicore/pybind11/ops/glu.hpp @@ -0,0 +1,21 @@ +#pragma once +#include "infinicore/ops/glu.hpp" +#include + +namespace py = pybind11; +namespace infinicore::ops { + +inline void bind_glu(py::module &m) { + m.def("glu", + &op::glu, + py::arg("input"), + py::arg("dim") = -1); + + m.def("glu_", + &op::glu_, + py::arg("input"), + py::arg("output"), + py::arg("dim") = -1); +} + +} // namespace infinicore::ops \ No newline at end of file diff --git a/src/infinicore/pybind11/ops/gt.hpp b/src/infinicore/pybind11/ops/gt.hpp new file mode 100644 index 000000000..44c375c2f --- /dev/null +++ b/src/infinicore/pybind11/ops/gt.hpp @@ -0,0 +1,19 @@ +#pragma once +#include "infinicore/ops/gt.hpp" +#include + +namespace py = pybind11; +namespace infinicore::ops { +inline void bind_gt(py::module &m) { + m.def("gt", + &op::gt, + py::arg("input"), + py::arg("other")); + + m.def("gt_", + &op::gt_, + py::arg("input"), + py::arg("other"), + py::arg("output")); +} +} // namespace infinicore::ops \ No newline at end of file diff --git a/src/infinicore/pybind11/ops/nll_loss.hpp b/src/infinicore/pybind11/ops/nll_loss.hpp new file mode 100644 index 000000000..f31827dfe --- /dev/null +++ b/src/infinicore/pybind11/ops/nll_loss.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "infinicore/ops/nll_loss.hpp" +#include +#include + +namespace py = pybind11; +namespace infinicore::ops { + +inline void bind_nll_loss(py::module &m) { + m.def("nll_loss", + &op::nll_loss, + py::arg("input"), + py::arg("target"), + py::arg("weight") = py::none(), + py::arg("ignore_index") = -100, + R"doc(Calculates NLL Loss.)doc"); + + m.def("nll_loss_", + &op::nll_loss_, + py::arg("input"), + py::arg("target"), + py::arg("weight"), + py::arg("output"), + py::arg("ignore_index") = -100, + R"doc(In-place NLL Loss calculation.)doc"); +} + +} // namespace infinicore::ops \ No newline at end of file diff --git a/src/infinicore/pybind11/ops/select_scatter.hpp b/src/infinicore/pybind11/ops/select_scatter.hpp new file mode 100644 index 000000000..0d6aeb07e --- /dev/null +++ b/src/infinicore/pybind11/ops/select_scatter.hpp @@ -0,0 +1,18 @@ +#pragma once +#include "infinicore/ops/select_scatter.hpp" +#include + +namespace py = pybind11; +namespace infinicore::ops { + +inline void bind_select_scatter(py::module &m) { + + m.def("select_scatter", + &op::select_scatter, + py::arg("input"), + py::arg("src"), + py::arg("dim"), + py::arg("index")); +} + +} // namespace infinicore::ops \ No newline at end of file diff --git a/test/infinicore/ops/gcd.py b/test/infinicore/ops/gcd.py index 9484511f8..6ded9390f 100644 --- a/test/infinicore/ops/gcd.py +++ b/test/infinicore/ops/gcd.py @@ -92,9 +92,9 @@ def get_test_cases(self): def torch_operator(self, *args, **kwargs): return torch.gcd(*args, **kwargs) - # def infinicore_operator(self, *args, **kwargs): - # """InfiniCore implementation (operator not yet available).""" - # return infinicore.gcd(*args, **kwargs) + def infinicore_operator(self, *args, **kwargs): + """InfiniCore implementation (operator not yet available).""" + return infinicore.gcd(*args, **kwargs) def main(): diff --git a/test/infinicore/ops/glu.py b/test/infinicore/ops/glu.py index f53f3097d..292c68b4a 100644 --- a/test/infinicore/ops/glu.py +++ b/test/infinicore/ops/glu.py @@ -76,9 +76,9 @@ def get_test_cases(self): def torch_operator(self, *args, **kwargs): return torch.nn.functional.glu(*args, **kwargs) - # def infinicore_operator(self, *args, **kwargs): - # """InfiniCore implementation (operator not yet available).""" - # return infinicore.nn.functional.glu(*args, **kwargs) + def infinicore_operator(self, *args, **kwargs): + """InfiniCore implementation (operator not yet available).""" + return infinicore.nn.functional.glu(*args, **kwargs) def main(): diff --git a/test/infinicore/ops/gt.py b/test/infinicore/ops/gt.py index b4809493e..959737bbf 100644 --- a/test/infinicore/ops/gt.py +++ b/test/infinicore/ops/gt.py @@ -111,9 +111,9 @@ def get_test_cases(self): def torch_operator(self, *args, **kwargs): return torch.gt(*args, **kwargs) - # def infinicore_operator(self, *args, **kwargs): - # """InfiniCore implementation (operator not yet available).""" - # return infinicore.gt(*args, **kwargs) + def infinicore_operator(self, *args, **kwargs): + """InfiniCore implementation (operator not yet available).""" + return infinicore.gt(*args, **kwargs) def main(): diff --git a/test/infinicore/ops/nll_loss.py b/test/infinicore/ops/nll_loss.py index 03291ae4b..9cb705ddc 100644 --- a/test/infinicore/ops/nll_loss.py +++ b/test/infinicore/ops/nll_loss.py @@ -87,9 +87,9 @@ def get_test_cases(self): def torch_operator(self, *args, **kwargs): return torch.nn.functional.nll_loss(*args, **kwargs) - # def infinicore_operator(self, *args, **kwargs): - # """InfiniCore implementation (operator not yet available).""" - # return infinicore.nn.functional.nll_loss(*args, **kwargs) + def infinicore_operator(self, *args, **kwargs): + """InfiniCore implementation (operator not yet available).""" + return infinicore.nn.functional.nll_loss(*args, **kwargs) def main(): diff --git a/test/infinicore/ops/select_scatter.py b/test/infinicore/ops/select_scatter.py index 12375ce15..5118b590c 100644 --- a/test/infinicore/ops/select_scatter.py +++ b/test/infinicore/ops/select_scatter.py @@ -91,9 +91,9 @@ def get_test_cases(self): def torch_operator(self, *args, **kwargs): return torch.select_scatter(*args, **kwargs) - # def infinicore_operator(self, *args, **kwargs): - # """InfiniCore implementation (operator not yet available).""" - # return infinicore.select_scatter(*args, **kwargs) + def infinicore_operator(self, *args, **kwargs): + """InfiniCore implementation (operator not yet available).""" + return infinicore.select_scatter(*args, **kwargs) def main(): From c2500041788077e374cb911386766fd05e3d5d5b Mon Sep 17 00:00:00 2001 From: PPPoint <1024879159@qq.com> Date: Mon, 2 Feb 2026 14:43:16 +0800 Subject: [PATCH 2/3] fromat --- python/infinicore/__init__.py | 6 +++--- python/infinicore/nn/functional/__init__.py | 4 ++-- python/infinicore/nn/functional/glu.py | 1 + python/infinicore/nn/functional/nll_loss.py | 1 + python/infinicore/ops/gcd.py | 1 + python/infinicore/ops/gt.py | 1 + python/infinicore/ops/select_scatter.py | 1 + 7 files changed, 10 insertions(+), 5 deletions(-) diff --git a/python/infinicore/__init__.py b/python/infinicore/__init__.py index 16b64ded3..0ec5b148c 100644 --- a/python/infinicore/__init__.py +++ b/python/infinicore/__init__.py @@ -45,6 +45,8 @@ from infinicore.ops.add import add from infinicore.ops.add_rms_norm import add_rms_norm, add_rms_norm_ from infinicore.ops.attention import attention +from infinicore.ops.gcd import gcd +from infinicore.ops.gt import gt from infinicore.ops.matmul import matmul from infinicore.ops.mul import mul from infinicore.ops.narrow import narrow @@ -52,11 +54,9 @@ from infinicore.ops.paged_attention_prefill import paged_attention_prefill from infinicore.ops.paged_caching import paged_caching from infinicore.ops.rearrange import rearrange +from infinicore.ops.select_scatter import select_scatter from infinicore.ops.squeeze import squeeze from infinicore.ops.unsqueeze import unsqueeze -from infinicore.ops.gcd import gcd -from infinicore.ops.gt import gt -from infinicore.ops.select_scatter import select_scatter from infinicore.tensor import ( Tensor, empty, diff --git a/python/infinicore/nn/functional/__init__.py b/python/infinicore/nn/functional/__init__.py index 18908ad1b..1c71f285b 100644 --- a/python/infinicore/nn/functional/__init__.py +++ b/python/infinicore/nn/functional/__init__.py @@ -1,13 +1,13 @@ from .causal_softmax import causal_softmax from .embedding import embedding +from .glu import glu from .linear import linear +from .nll_loss import nll_loss from .random_sample import random_sample from .rms_norm import rms_norm from .rope import RopeAlgo, rope from .silu import silu from .swiglu import swiglu -from .nll_loss import nll_loss -from .glu import glu __all__ = [ "causal_softmax", diff --git a/python/infinicore/nn/functional/glu.py b/python/infinicore/nn/functional/glu.py index e9126ad35..96580743c 100644 --- a/python/infinicore/nn/functional/glu.py +++ b/python/infinicore/nn/functional/glu.py @@ -2,6 +2,7 @@ from infinicore.lib import _infinicore from infinicore.tensor import Tensor + def glu(input: Tensor, dim: int = -1) -> Tensor: if infinicore.use_ntops and input.device.type in ("cuda", "musa"): diff --git a/python/infinicore/nn/functional/nll_loss.py b/python/infinicore/nn/functional/nll_loss.py index 4c731472b..97ea6d494 100644 --- a/python/infinicore/nn/functional/nll_loss.py +++ b/python/infinicore/nn/functional/nll_loss.py @@ -2,6 +2,7 @@ from infinicore.lib import _infinicore from infinicore.tensor import Tensor + def nll_loss( input: Tensor, target: Tensor, diff --git a/python/infinicore/ops/gcd.py b/python/infinicore/ops/gcd.py index defc96a52..aefb44e7d 100644 --- a/python/infinicore/ops/gcd.py +++ b/python/infinicore/ops/gcd.py @@ -2,6 +2,7 @@ from infinicore.lib import _infinicore from infinicore.tensor import Tensor + def gcd(input: Tensor, other: Tensor, *, out=None) -> Tensor: r"""Computes the element-wise greatest common divisor (GCD).""" diff --git a/python/infinicore/ops/gt.py b/python/infinicore/ops/gt.py index f347df279..a628e1709 100644 --- a/python/infinicore/ops/gt.py +++ b/python/infinicore/ops/gt.py @@ -2,6 +2,7 @@ from infinicore.lib import _infinicore from infinicore.tensor import Tensor + def gt(input: Tensor, other: Tensor | float, *, out: Tensor | None = None) -> Tensor: if infinicore.use_ntops and input.device.type in ("cuda", "musa"): return infinicore.ntops.torch.gt(input, other, out=out) diff --git a/python/infinicore/ops/select_scatter.py b/python/infinicore/ops/select_scatter.py index 31af019df..4a449eab3 100644 --- a/python/infinicore/ops/select_scatter.py +++ b/python/infinicore/ops/select_scatter.py @@ -2,6 +2,7 @@ from infinicore.lib import _infinicore from infinicore.tensor import Tensor + def select_scatter(input: Tensor, src: Tensor, dim: int, index: int) -> Tensor: if infinicore.use_ntops and input.device.type in ("cuda", "musa"): return infinicore.ntops.torch.select_scatter(input, src, dim, index) From d5e9c757d1a5a525b389966f805bd360741dba06 Mon Sep 17 00:00:00 2001 From: PPPoint <1024879159@qq.com> Date: Mon, 2 Feb 2026 14:50:50 +0800 Subject: [PATCH 3/3] format --- python/infinicore/nn/functional/glu.py | 3 +-- python/infinicore/nn/functional/nll_loss.py | 20 ++++++++------------ python/infinicore/ops/gcd.py | 6 +++--- python/infinicore/ops/gt.py | 4 ++-- python/infinicore/ops/select_scatter.py | 4 +++- 5 files changed, 17 insertions(+), 20 deletions(-) diff --git a/python/infinicore/nn/functional/glu.py b/python/infinicore/nn/functional/glu.py index 96580743c..08f40ee11 100644 --- a/python/infinicore/nn/functional/glu.py +++ b/python/infinicore/nn/functional/glu.py @@ -4,8 +4,7 @@ def glu(input: Tensor, dim: int = -1) -> Tensor: - if infinicore.use_ntops and input.device.type in ("cuda", "musa"): return infinicore.ntops.torch.glu(input, dim) - return Tensor(_infinicore.glu(input._underlying, dim)) \ No newline at end of file + return Tensor(_infinicore.glu(input._underlying, dim)) diff --git a/python/infinicore/nn/functional/nll_loss.py b/python/infinicore/nn/functional/nll_loss.py index 97ea6d494..0ba421adf 100644 --- a/python/infinicore/nn/functional/nll_loss.py +++ b/python/infinicore/nn/functional/nll_loss.py @@ -12,7 +12,6 @@ def nll_loss( *, out=None, ) -> Tensor: - if infinicore.use_ntops and input.device.type in ("cuda", "musa"): return infinicore.ntops.torch.nll_loss( input, target, weight=weight, ignore_index=ignore_index, reduction=reduction @@ -23,18 +22,15 @@ def nll_loss( if out is None: return Tensor( _infinicore.nll_loss( - input._underlying, - target._underlying, - weight_underlying, - ignore_index + input._underlying, target._underlying, weight_underlying, ignore_index ) ) - + _infinicore.nll_loss_( - input._underlying, - target._underlying, - weight_underlying, - out._underlying, - ignore_index + input._underlying, + target._underlying, + weight_underlying, + out._underlying, + ignore_index, ) - return out \ No newline at end of file + return out diff --git a/python/infinicore/ops/gcd.py b/python/infinicore/ops/gcd.py index aefb44e7d..dc9b54e3d 100644 --- a/python/infinicore/ops/gcd.py +++ b/python/infinicore/ops/gcd.py @@ -5,12 +5,12 @@ def gcd(input: Tensor, other: Tensor, *, out=None) -> Tensor: r"""Computes the element-wise greatest common divisor (GCD).""" - + if infinicore.use_ntops and input.device.type in ("cuda", "musa"): return infinicore.ntops.torch.gcd(input, other, out=out) if out is None: return Tensor(_infinicore.gcd(input._underlying, other._underlying)) - + _infinicore.gcd_(input._underlying, other._underlying, out._underlying) - return out \ No newline at end of file + return out diff --git a/python/infinicore/ops/gt.py b/python/infinicore/ops/gt.py index a628e1709..afe93645d 100644 --- a/python/infinicore/ops/gt.py +++ b/python/infinicore/ops/gt.py @@ -12,6 +12,6 @@ def gt(input: Tensor, other: Tensor | float, *, out: Tensor | None = None) -> Te if out is None: return Tensor(_infinicore.gt(input._underlying, other._underlying)) - + _infinicore.gt_(input._underlying, other._underlying, out._underlying) - return out \ No newline at end of file + return out diff --git a/python/infinicore/ops/select_scatter.py b/python/infinicore/ops/select_scatter.py index 4a449eab3..87b5b420a 100644 --- a/python/infinicore/ops/select_scatter.py +++ b/python/infinicore/ops/select_scatter.py @@ -7,4 +7,6 @@ def select_scatter(input: Tensor, src: Tensor, dim: int, index: int) -> Tensor: if infinicore.use_ntops and input.device.type in ("cuda", "musa"): return infinicore.ntops.torch.select_scatter(input, src, dim, index) - return Tensor(_infinicore.select_scatter(input._underlying, src._underlying, dim, index)) \ No newline at end of file + return Tensor( + _infinicore.select_scatter(input._underlying, src._underlying, dim, index) + )