Skip to content

Commit 4ec0d56

Browse files
SW publisherJenkins
authored andcommitted
deepspeed-fork content for 1.16.1
Signed-off-by: SW publisher <sw_publisher@habana-labs.com>
1 parent ce78a63 commit 4ec0d56

212 files changed

Lines changed: 10357 additions & 1325 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/nv-nightly.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@ jobs:
3838
git rev-parse --short HEAD
3939
pip install .
4040
41-
- name: Install datasets
42-
run: |
43-
pip install datasets
44-
4541
- name: Install deepspeed
4642
run: |
4743
pip install .[dev,1bit,autotuning,inf]

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ repos:
3939
name: check-torchdist
4040
entry: ./scripts/check-torchdist.py
4141
language: python
42-
exclude: ^(deepspeed/comm/|docs/|benchmarks/|scripts/check-torchdist.py|deepspeed/moe/sharded_moe.py|deepspeed/runtime/comm/coalesced_collectives.py|deepspeed/elasticity/elastic_agent.py|deepspeed/launcher/launch.py|tests/unit/comm/test_dist.py)
42+
exclude: ^(deepspeed/comm/|docs/|benchmarks/|scripts/check-torchdist.py|deepspeed/moe/sharded_moe.py|deepspeed/runtime/comm/coalesced_collectives.py|deepspeed/elasticity/elastic_agent.py|deepspeed/launcher/launch.py|tests/unit/comm/test_dist.py|deepspeed/runtime/zero/utils.py|deepspeed/tools/pg_sim/ut/base.py|deepspeed/tools/pg_sim/pg.py)
4343
# Specific deepspeed/ files are excluded for now until we wrap ProcessGroup in deepspeed.comm
4444

4545
- repo: local

accelerator/abstract_accelerator.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,10 @@ def create_op_builder(self, class_name):
280280
def get_op_builder(self, class_name):
281281
...
282282

283+
@abc.abstractmethod
284+
def get_compile_backend(self):
285+
...
286+
283287
@abc.abstractmethod
284288
def build_extension(self):
285289
...

accelerator/cpu_accelerator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,3 +302,6 @@ def build_extension(self):
302302

303303
def export_envs(self):
304304
return []
305+
306+
def get_compile_backend(self):
307+
return "inductor"

accelerator/cuda_accelerator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,3 +360,6 @@ def build_extension(self):
360360

361361
def export_envs(self):
362362
return ['NCCL']
363+
364+
def get_compile_backend(self):
365+
return "inductor"

accelerator/hpu_accelerator.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,17 @@ def get_op_builder(self, class_name):
288288
else:
289289
return self.class_dict['NotImplementedBuilder'] if 'NotImplementedBuilder' in self.class_dict else None
290290

291+
def get_compile_backend(self):
292+
return "hpu_backend"
293+
294+
#shall be removed once moving to torch.compile
295+
def wrap_in_hpu_graph(self, module):
296+
if self.hpu.is_lazy():
297+
module = self.hpu.wrap_in_hpu_graph(module)
298+
else:
299+
print("Warning: hpu graphs in eager mode is not supported, ignoring")
300+
return module
301+
291302
def build_extension(self):
292303
from torch.utils.cpp_extension import BuildExtension
293304
return BuildExtension

accelerator/mps_accelerator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,6 @@ def build_extension(self):
258258

259259
def export_envs(self):
260260
return []
261+
262+
def get_compile_backend(self):
263+
return "inductor"

accelerator/npu_accelerator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,6 @@ def build_extension(self):
278278

279279
def export_envs(self):
280280
return ['ASCEND', 'HCCL', 'LD_LIBRARY', 'PATH']
281+
282+
def get_compile_backend(self):
283+
return "inductor"

build.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
+hpu.synapse.v1.16.1

csrc/adam/cpu_adam_impl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,13 +244,17 @@ int ds_adam_step(int optimizer_id,
244244
opt->IncrementStep(step, beta1, beta2);
245245
opt->update_state(lr, epsilon, weight_decay, bias_correction);
246246

247+
bool bit16_precision = false;
248+
if ((params.options().dtype() == at::kHalf) || (params.options().dtype() == at::kBFloat16))
249+
bit16_precision = true;
250+
247251
opt->Step_8(params_ptr,
248252
grads_ptr,
249253
exp_avg_ptr,
250254
exp_avg_sq_ptr,
251255
params_c.numel(),
252256
nullptr,
253-
(params.options().dtype() == at::kHalf));
257+
bit16_precision);
254258

255259
#if defined(__ENABLE_CUDA__) or defined(__ENABLE_CANN__)
256260
opt->SynchronizeStreams();

0 commit comments

Comments
 (0)