Skip to content

Commit 4dfeede

Browse files
Luke Iwanskibenoitsteiner
authored andcommitted
Fixes & Version bump (#33)
1 parent 870fa3a commit 4dfeede

11 files changed

Lines changed: 81 additions & 12 deletions

File tree

tensorflow/core/kernels/cast_op.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ class SyclCastOp : public CastOpBase {
226226
work_ = GetSyclCastFromBool(dst_dtype_);
227227
} else if (src_dtype_ == DT_INT32) {
228228
work_ = GetSyclCastFromInt32(dst_dtype_);
229+
} else if (src_dtype_ == DT_INT64) {
230+
work_ = GetSyclCastFromInt64(dst_dtype_);
229231
} else if (src_dtype_ == DT_FLOAT) {
230232
work_ = GetSyclCastFromFloat(dst_dtype_);
231233
} else if (src_dtype_ == DT_DOUBLE) {
@@ -245,6 +247,7 @@ class SyclCastOp : public CastOpBase {
245247

246248
CURRY_TYPES2(REGISTER_CAST_SYCL, bool);
247249
CURRY_TYPES2(REGISTER_CAST_SYCL, int32);
250+
CURRY_TYPES2(REGISTER_CAST_SYCL, int64);
248251
CURRY_TYPES2(REGISTER_CAST_SYCL, float);
249252
CURRY_TYPES2(REGISTER_CAST_SYCL, double);
250253

tensorflow/core/kernels/cast_op_impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ GetSyclCastFromBool(DataType dst_dtype);
157157
std::function<void(OpKernelContext*, const Tensor&, Tensor*)>
158158
GetSyclCastFromInt32(DataType dst_dtype);
159159

160+
std::function<void(OpKernelContext*, const Tensor&, Tensor*)>
161+
GetSyclCastFromInt64(DataType dst_dtype);
162+
160163
std::function<void(OpKernelContext*, const Tensor&, Tensor*)>
161164
GetSyclCastFromFloat(DataType dst_dtype);
162165

tensorflow/core/kernels/cast_op_impl_int64.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ namespace tensorflow {
1919

2020
typedef Eigen::ThreadPoolDevice CPUDevice;
2121
typedef Eigen::GpuDevice GPUDevice;
22+
#ifdef TENSORFLOW_USE_SYCL
23+
typedef Eigen::SyclDevice SYCLDevice;
24+
#endif // TENSORFLOW_USE_SYCL
2225

2326
std::function<void(OpKernelContext*, const Tensor&, Tensor*)>
2427
GetCpuCastFromInt64(DataType dst_dtype) {
@@ -34,4 +37,13 @@ GetGpuCastFromInt64(DataType dst_dtype) {
3437
}
3538
#endif // GOOGLE_CUDA
3639

40+
#ifdef TENSORFLOW_USE_SYCL
41+
typedef Eigen::SyclDevice SYCLDevice;
42+
std::function<void(OpKernelContext*, const Tensor&, Tensor*)>
43+
GetSyclCastFromInt64(DataType dst_dtype) {
44+
CURRY_TYPES3(CAST_CASE, SYCLDevice, int64);
45+
return nullptr;
46+
}
47+
#endif // TENSORFLOW_USE_SYCL
48+
3749
} // namespace tensorflow

tensorflow/core/kernels/constant_op.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
6060
REGISTER_SYCL_KERNEL(float);
6161
REGISTER_SYCL_KERNEL(double);
6262
REGISTER_SYCL_KERNEL(bool);
63+
REGISTER_SYCL_KERNEL(int64);
6364
#undef REGISTER_SYCL_KERNEL
6465
#endif
6566

tensorflow/core/kernels/relu_op.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ namespace tensorflow {
2929

3030
typedef Eigen::ThreadPoolDevice CPUDevice;
3131
typedef Eigen::GpuDevice GPUDevice;
32+
#ifdef TENSORFLOW_USE_SYCL
33+
typedef Eigen::SyclDevice SYCLDevice;
34+
#endif // TENSORFLOW_USE_SYCL
3235

3336
#define REGISTER_RELU_KERNELS(type) \
3437
REGISTER_KERNEL_BUILDER( \
@@ -131,4 +134,30 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
131134

132135
#endif // GOOGLE_CUDA
133136

137+
#ifdef TENSORFLOW_USE_SYCL
138+
// Registration of the GPU implementations.
139+
#define REGISTER_SYCL_KERNELS(type) \
140+
REGISTER_KERNEL_BUILDER( \
141+
Name("Relu").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
142+
ReluOp<SYCLDevice, type>); \
143+
REGISTER_KERNEL_BUILDER( \
144+
Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
145+
ReluGradOp<SYCLDevice, type>); \
146+
REGISTER_KERNEL_BUILDER( \
147+
Name("Relu6").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
148+
Relu6Op<SYCLDevice, type>); \
149+
REGISTER_KERNEL_BUILDER( \
150+
Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
151+
Relu6GradOp<SYCLDevice, type>); \
152+
REGISTER_KERNEL_BUILDER( \
153+
Name("Elu").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
154+
EluOp<SYCLDevice, type>); \
155+
REGISTER_KERNEL_BUILDER( \
156+
Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
157+
EluGradOp<SYCLDevice, type>)
158+
159+
REGISTER_SYCL_KERNELS(float);
160+
#undef REGISTER_SYCL_KERNELS
161+
#endif // TENSORFLOW_USE_SYCL
162+
134163
} // namespace tensorflow

tensorflow/core/kernels/relu_op.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ void EluGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
175175

176176
} // namespace tensorflow
177177

178+
#ifdef TENSORFLOW_USE_SYCL
179+
#undef EIGEN_USE_SYCL
180+
#endif // TENSORFLOW_USE_SYCL
181+
178182
#undef EIGEN_USE_THREADS
179183

180184
#endif // TENSORFLOW_KERNELS_RELU_OP_H_

tensorflow/core/kernels/transpose_op.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,6 @@ Status TransposeSyclOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
225225
.HostMemory("perm"), \
226226
TransposeSyclOp);
227227
REGISTER(float);
228-
REGISTER(double);
229228
REGISTER(bool);
230229
REGISTER(int32);
231230
#undef REGISTER

tensorflow/core/kernels/xent_op.cc

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ namespace tensorflow {
2828

2929
typedef Eigen::ThreadPoolDevice CPUDevice;
3030
typedef Eigen::GpuDevice GPUDevice;
31+
#ifdef TENSORFLOW_USE_SYCL
32+
typedef Eigen::SyclDevice SYCLDevice;
33+
#endif // TENSORFLOW_USE_SYCL
3134

3235
template <typename Device, typename T>
3336
class SoftmaxXentWithLogitsOp : public OpKernel {
@@ -74,17 +77,25 @@ class SoftmaxXentWithLogitsOp : public OpKernel {
7477
// Partial specialization for a CPUDevice, that uses the Eigen implementation
7578
// from XentEigenImpl.
7679
namespace functor {
77-
template <typename T>
78-
struct XentFunctor<CPUDevice, T> {
79-
void operator()(const CPUDevice& d, typename TTypes<T>::ConstMatrix logits,
80+
template <typename Device, typename T>
81+
struct XentFunctorBase {
82+
void operator()(const Device& d, typename TTypes<T>::ConstMatrix logits,
8083
typename TTypes<T>::ConstMatrix labels,
8184
typename TTypes<T>::Matrix scratch,
8285
typename TTypes<T>::Vec loss,
8386
typename TTypes<T>::Matrix backprop) {
84-
XentEigenImpl<CPUDevice, T>::Compute(d, logits, labels, scratch, loss,
87+
XentEigenImpl<Device, T>::Compute(d, logits, labels, scratch, loss,
8588
backprop);
8689
}
8790
};
91+
92+
template <typename T>
93+
struct XentFunctor<CPUDevice, T> : XentFunctorBase<CPUDevice, T> {};
94+
95+
#ifdef TENSORFLOW_USE_SYCL
96+
template <typename T>
97+
struct XentFunctor<SYCLDevice, T> : XentFunctorBase<SYCLDevice, T> {};
98+
#endif // TENSORFLOW_USE_SYCL
8899
} // namespace functor
89100

90101
#define REGISTER_CPU(T) \
@@ -111,4 +122,11 @@ REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
111122
SoftmaxXentWithLogitsOp<GPUDevice, double>);
112123
#endif // GOOGLE_CUDA
113124

125+
#ifdef TENSORFLOW_USE_SYCL
126+
REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
127+
.Device(DEVICE_SYCL)
128+
.TypeConstraint<float>("T"),
129+
SoftmaxXentWithLogitsOp<SYCLDevice, float>);
130+
#endif // TENSORFLOW_USE_SYCL
131+
114132
} // namespace tensorflow

tensorflow/g3doc/get_started/os_setup.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,6 @@ and copy the files into e.g. `/usr/local/computecpp`:
727727

728728
```bash
729729
tar -xvzf ComputeCpp-CE-0.1.1-Ubuntu.14.04-64bit.tar.gz
730-
sudo mkdir /usr/local/computecpp
731730
sudo cp -R ComputeCpp-CE-0.1.1-Linux /usr/local/computecpp
732731
sudo chmod -R a+r /usr/local/computecpp/
733732
sudo chmod -R a+x /usr/local/computecpp/bin

tensorflow/workspace.bzl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
7474
name = "eigen_archive",
7575
urls = [
7676
#"http://bazel-mirror.storage.googleapis.com/bitbucket.org/eigen/eigen/get/60578b474802.tar.gz",
77-
"https://bitbucket.org/benoitsteiner/opencl/get/a7bedd616f70.tar.gz",
77+
"https://bitbucket.org/benoitsteiner/opencl/get/5c067614e3e1.tar.gz",
7878
],
7979
#sha256 = "7527cda827aff351981ebd910012e16be4d899c28a9ae7f143ae60e7f3f7b83d",
80-
strip_prefix = "benoitsteiner-opencl-a7bedd616f70",
80+
strip_prefix = "benoitsteiner-opencl-5c067614e3e1",
8181
build_file = str(Label("//third_party:eigen.BUILD")),
8282
)
8383

0 commit comments

Comments
 (0)