Skip to content

Commit 0506745

Browse files
Your Namedcslin
authored andcommitted
tc comprehension integration Ref. SINGA-482
1 parent 806dbe7 commit 0506745

9 files changed

Lines changed: 576 additions & 3 deletions

File tree

cmake/Dependencies.cmake

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,43 @@ IF(USE_MKLDNN)
149149
INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR})
150150
LIST(APPEND SINGA_LINKER_LIBS ${MKLDNN_LIBRARIES})
151151
ENDIF()
152+
153+
154+
### Tensor comprehensions
155+
INCLUDE_DIRECTORIES(/root/TensorComprehensions)
156+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/tc/version)
157+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build)
158+
# polyhedral model required
159+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/isl_interface/include)
160+
# dlpack
161+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/dlpack/include)
162+
# islpp
163+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/islpp/include)
164+
# gflags
165+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/gflags/include)
166+
# glog
167+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/glog)
168+
# Halide
169+
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include/Halide)
170+
# llvm
171+
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include)
172+
# torch ATen header
173+
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib/include)
174+
175+
# find Halide lib
176+
set(HALIDE_PREFIX "/root/conda/envs/tc_build")
177+
find_library(HALIDE_LIBRARIES REQUIRED NAMES Halide PATHS ${HALIDE_PREFIX} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH)
178+
message(STATUS "Found Halide.so file: ${HALIDE_LIBRARIES}")
179+
180+
# find tc lib
181+
link_directories(/root/TensorComprehensions/build/tc/aten)
182+
link_directories(/root/TensorComprehensions/build/tc/lang)
183+
link_directories(/root/TensorComprehensions/build/tc/core)
184+
link_directories(/root/TensorComprehensions/build/tc/autotuner)
185+
link_directories(/root/TensorComprehensions/build/tc/proto)
186+
187+
# torch(aten)
188+
link_directories(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib)
189+
190+
LIST(APPEND SINGA_LINKER_LIBS ${HALIDE_LIBRARIES} tc_aten tc_lang tc_core_cpu tc_cuda tc_core_cuda_no_sdk tc_core tc_autotuner tc_proto ATen)
191+
### Tensor comprehensions

include/singa/core/tensor.h

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,18 @@
2323
#include <tuple>
2424
#include <memory>
2525

26+
// tc
27+
#include <dlpack/dlpack.h>
28+
#include <tc/core/tensor.h>
29+
#include <tc/utils/compiler_options.h>
30+
#include <tc/core/compiler.h>
31+
#include <tc/core/utils/time.h>
32+
#include <tc/core/cuda/cuda_backend.h>
33+
#include <tc/core/cuda/cuda_tc_executor.h>
34+
#include <tc/core/cpu/cpu_backend.h>
35+
#include <tc/core/cpu/cpu_tc_executor.h>
36+
// tc
37+
2638
#include "singa/core/common.h"
2739
#include "singa/core/device.h"
2840
#include "singa/proto/core.pb.h"
@@ -603,6 +615,83 @@ Tensor ConcatRows(const vector<Tensor> &in);
603615
Tensor ConcatenateColumns(const vector<Tensor> &in);
604616
/// Alias name for function ConcatenateColumns
605617
Tensor ConcatColumns(const vector<Tensor> &in);
618+
619+
620+
621+
622+
/// tc integration start
623+
DLManagedTensor *toDLPack(const Tensor &src);
624+
625+
inline std::vector<tc::DLTensorUPtr>
626+
makeDLTensors(const std::vector<Tensor> &tensors);
627+
628+
template <typename Backend>
629+
std::unique_ptr<typename Backend::ExecutorType>
630+
compileTC(const std::string &tc, const std::string &entryPoint,
631+
const std::vector<Tensor> &inputs,
632+
const typename Backend::MappingOptionsType &options,
633+
const tc::CompilerOptions &compilerOptions = tc::CompilerOptions());
634+
635+
std::vector<tc::DLTensorUPtr>
636+
inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
637+
const std::vector<Tensor> &inputs);
638+
639+
std::vector<Tensor> prepareOutputs(const std::string &tc,
640+
const std::string &entryPoint,
641+
const std::vector<Tensor> &inputs);
642+
643+
template <typename Executor>
644+
void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
645+
std::vector<Tensor> &outputs);
646+
647+
// makeDLConstTensors implementation
648+
inline std::vector<tc::DLConstTensorUPtr>
649+
makeDLConstTensors(const std::vector<Tensor> &tensors) {
650+
std::vector<tc::DLConstTensorUPtr> dlTensors;
651+
for (auto tensor : tensors) {
652+
auto dlMTensor = toDLPack(tensor);
653+
dlTensors.push_back(tc::makeDLConstTensor(&(dlMTensor->dl_tensor)));
654+
dlMTensor->deleter(dlMTensor);
655+
}
656+
return dlTensors;
657+
}
658+
659+
// makeDLTensors implementation
660+
inline std::vector<tc::DLTensorUPtr>
661+
makeDLTensors(const std::vector<Tensor> &tensors) {
662+
std::vector<tc::DLTensorUPtr> dlTensors;
663+
for (auto tensor : tensors) {
664+
auto dlMTensor = toDLPack(tensor);
665+
dlTensors.push_back(tc::makeDLTensor(&(dlMTensor->dl_tensor)));
666+
dlMTensor->deleter(dlMTensor);
667+
}
668+
return dlTensors;
669+
}
670+
671+
// compile implementation
672+
template <typename Backend>
673+
std::unique_ptr<typename Backend::ExecutorType>
674+
compileTC(const std::string &tc, const std::string &entryPoint,
675+
const std::vector<Tensor> &inputs,
676+
const typename Backend::MappingOptionsType &options,
677+
const tc::CompilerOptions &compilerOptions) {
678+
auto inputDLTensors = makeDLConstTensors(inputs);
679+
return tc::compile<Backend>(tc, entryPoint, extractRawPtrs(inputDLTensors),
680+
options, compilerOptions);
681+
}
682+
683+
// run implementation
684+
template <typename Executor>
685+
void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
686+
std::vector<Tensor> &outputs) {
687+
auto inputDLTensors = makeDLConstTensors(inputs);
688+
auto outputDLTensors = makeDLTensors(outputs);
689+
return executor.run(extractRawPtrs(inputDLTensors),
690+
extractRawPtrs(outputDLTensors));
691+
}
692+
693+
/// tc integration end
694+
606695
} // namespace singa
607696

608697
#endif // SINGA_CORE_TENSOR_H_

src/core/tensor/tensor.cc

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,19 @@
2424
#include <utility>
2525
#include <algorithm>
2626

27+
// tc
28+
#include <tc/core/check.h>
29+
#include <tc/core/compiler.h>
30+
#include <tc/core/tc_executor.h>
31+
#include <tc/core/tensor.h>
32+
// tc
2733

2834
#define Noaxis 9999
2935

36+
// namespace is already exist in singa
37+
// aliasing to avoid duplicates
38+
namespace tclang = lang;
39+
3040
namespace singa {
3141

3242
Tensor::~Tensor() {
@@ -1334,4 +1344,109 @@ Tensor Reshape(const Tensor &in, const Shape &s) {
13341344
return out.Reshape(s);
13351345
}
13361346

1347+
1348+
/// tc integration start
1349+
struct SingaDLManagedTensor {
1350+
Tensor handle;
1351+
DLManagedTensor tensor;
1352+
};
1353+
1354+
void deleter(DLManagedTensor *arg) {
1355+
delete static_cast<SingaDLManagedTensor *>(arg->manager_ctx);
1356+
}
1357+
1358+
static DLDataType getDLDataType(const Tensor &t) {
1359+
DLDataType dtype;
1360+
dtype.lanes = 1;
1361+
dtype.bits = SizeOf(t.data_type()) * 8;
1362+
switch (t.data_type()) {
1363+
case kFloat32:
1364+
dtype.code = DLDataTypeCode::kDLFloat;
1365+
break;
1366+
default:
1367+
throw std::logic_error("only kFloat32 is supported for dlpack conversion");
1368+
break;
1369+
}
1370+
return dtype;
1371+
}
1372+
1373+
static DLContext getDLContext(const Tensor &tensor, const int64_t &device_id) {
1374+
DLContext ctx;
1375+
ctx.device_id = device_id;
1376+
if (tensor.device()->lang() == kCuda) {
1377+
ctx.device_type = DLDeviceType::kDLGPU;
1378+
} else {
1379+
ctx.device_type = DLDeviceType::kDLCPU;
1380+
}
1381+
return ctx;
1382+
}
1383+
1384+
// This function returns a shared_ptr to memory managed DLpack tensor
1385+
// constructed out of ATen tensor
1386+
DLManagedTensor *toDLPack(const Tensor &src) {
1387+
SingaDLManagedTensor *singaDLManagedTensor(new SingaDLManagedTensor);
1388+
singaDLManagedTensor->handle = src;
1389+
singaDLManagedTensor->tensor.manager_ctx = singaDLManagedTensor;
1390+
singaDLManagedTensor->tensor.deleter = &deleter;
1391+
singaDLManagedTensor->tensor.dl_tensor.data = src.block()->mutable_data();
1392+
int64_t device_id = src.device()->id();
1393+
singaDLManagedTensor->tensor.dl_tensor.ctx = getDLContext(src, device_id);
1394+
singaDLManagedTensor->tensor.dl_tensor.ndim = src.nDim();
1395+
singaDLManagedTensor->tensor.dl_tensor.dtype = getDLDataType(src);
1396+
1397+
auto shapeVec =
1398+
new std::vector<int64_t>(src.shape().begin(), src.shape().end());
1399+
singaDLManagedTensor->tensor.dl_tensor.shape = shapeVec->data();
1400+
1401+
auto strideVec =
1402+
new std::vector<int64_t>(src.stride().begin(), src.stride().end());
1403+
singaDLManagedTensor->tensor.dl_tensor.strides = strideVec->data();
1404+
1405+
singaDLManagedTensor->tensor.dl_tensor.byte_offset = 0;
1406+
return &(singaDLManagedTensor->tensor);
1407+
}
1408+
1409+
// prepare output
1410+
std::vector<tc::DLTensorUPtr>
1411+
inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
1412+
const std::vector<Tensor> &inputs) {
1413+
auto parsedTcs = tc::detail::parse(tc);
1414+
if (parsedTcs.count(entryPoint) != 1u) {
1415+
TC_CHECK_GE(parsedTcs.size(), 1u)
1416+
<< "No TC was parsed, should have thrown earlier";
1417+
throw tclang::ErrorReport(parsedTcs.begin()->second)
1418+
<< "\nattempting to access undefined entryPoint: " << entryPoint;
1419+
}
1420+
auto inputDLTensors = makeDLConstTensors(inputs);
1421+
return makeDLTensorVector(tc::detail::inferOutputTensorInfo(
1422+
parsedTcs.at(entryPoint), extractRawPtrs(inputDLTensors)));
1423+
}
1424+
1425+
std::vector<Tensor> prepareOutputs(const std::string &tc,
1426+
const std::string &entryPoint,
1427+
const std::vector<Tensor> &inputs) {
1428+
std::vector<Tensor> outputs;
1429+
auto outTensorInfo = inferOutputTensorInfo(tc, entryPoint, inputs);
1430+
if (outTensorInfo.size() == 0) {
1431+
return outputs;
1432+
}
1433+
TC_CHECK_GE(inputs.size(), 1u)
1434+
<< "NYI: Need >= 1 input tensors to determine "
1435+
<< "backend and prepare ATen outputs. Add an overload with just an ATen "
1436+
<< "backend";
1437+
1438+
auto dev = inputs[0].device();
1439+
auto dtype = inputs[0].data_type();
1440+
for (size_t i = 0; i < outTensorInfo.size(); ++i) {
1441+
tc::TensorInfo info(outTensorInfo[i]);
1442+
Shape shape(info.shape.begin(), info.shape.end());
1443+
1444+
Tensor tmp(shape, dev, dtype);
1445+
outputs.push_back(tmp);
1446+
}
1447+
return outputs;
1448+
}
1449+
/// tc integration end
1450+
1451+
13371452
} // namespace singa

src/model/operation/tc_fn.cc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*********************************************************
2+
*
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
*
20+
************************************************************/
21+
#include "./tc_fn.h"
22+
23+
namespace singa {
24+
25+
TcFnHandle::TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs)
26+
{
27+
tc_string = tcDefinition;
28+
tc_name = entryFn;
29+
auto naiveOptions = tc::CudaBackend::MappingOptionsType::makeNaiveMappingOptions();
30+
pExecutor = singa::compileTC<tc::CudaBackend>(tcDefinition, entryFn, inputs, {naiveOptions});
31+
};
32+
33+
Tensor tcExecute(const TcFnHandle &tcFnhandle, const std::vector<Tensor> &inputs)
34+
{
35+
auto outputs = singa::prepareOutputs(tcFnhandle.tc_string, tcFnhandle.tc_name, inputs);
36+
singa::runTC(*(tcFnhandle.pExecutor), inputs, outputs);
37+
return outputs[0];
38+
}
39+
40+
}

src/model/operation/tc_fn.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*********************************************************
2+
*
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
*
20+
************************************************************/
21+
//#ifndef SINGA_MODEL_OPERATION_TC_FN_H_
22+
//#define SINGA_MODEL_OPERATION_TC_FN_H_
23+
24+
25+
#include "singa/core/tensor.h"
26+
27+
namespace singa {
28+
29+
class TcFnHandle {
30+
public:
31+
TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs);
32+
std::string tc_string;
33+
std::string tc_name;
34+
std::unique_ptr<typename tc::CudaBackend::ExecutorType> pExecutor;
35+
};
36+
37+
Tensor tcExecute(const TcFnHandle &smh, const std::vector<Tensor> &inputs);
38+
39+
} // namespace singa

test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ LIST(REMOVE_ITEM singa_test_source "singa/test_ep.cc")
3333
ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source})
3434
ADD_DEPENDENCIES(test_singa singa)
3535
#MESSAGE(STATUS "link libs" ${singa_linker_libs})
36-
TARGET_LINK_LIBRARIES(test_singa gtest singa )
36+
TARGET_LINK_LIBRARIES(test_singa gtest singa ${SINGA_LINKER_LIBS})
3737
IF(UNIX AND (NOT APPLE))
3838
LIST(APPEND LINK_FLAGS "-pthread")
3939
ENDIF()

0 commit comments

Comments
 (0)