Skip to content

Commit cb51b03

Browse files
committed
Set scheduler v1 as default
1 parent 7a521bb commit cb51b03

4 files changed

Lines changed: 23 additions & 2 deletions

File tree

fastdeploy/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1292,7 +1292,7 @@ def check(self):
12921292
), "TP and EP cannot be enabled at the same time"
12931293

12941294
if not self.cache_config.enable_chunked_prefill:
1295-
if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
1295+
if not envs.ENABLE_V1_KVCACHE_SCHEDULER:
12961296
assert self.max_num_batched_tokens >= self.max_model_len, (
12971297
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
12981298
f"should be larger than or equal to max_model_len: {self.max_model_len}"

fastdeploy/engine/args_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,12 @@ def __post_init__(self):
392392
raise NotImplementedError("Logprob does not support enable_expert_parallel.")
393393
if not current_platform.is_cuda():
394394
raise NotImplementedError("Only CUDA platform supports logprob.")
395+
if self.speculative_config is not None:
396+
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
397+
if self.splitwise_role != "mixed":
398+
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
399+
if (not current_platform.is_cuda()) and (not current_platform.is_xpu()):
400+
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
395401

396402
@staticmethod
397403
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

fastdeploy/envs.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
# set traec exporter_otlp_headers.
8282
"EXPORTER_OTLP_HEADERS": lambda: os.getenv("EXPORTER_OTLP_HEADERS"),
8383
# enable kv cache block scheduler v1 (no need for kv_cache_ratio)
84-
"ENABLE_V1_KVCACHE_SCHEDULER": lambda: int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")),
84+
"ENABLE_V1_KVCACHE_SCHEDULER": lambda: int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "1")),
8585
# Whether to use PLUGINS.
8686
"FD_PLUGINS": lambda: None if "FD_PLUGINS" not in os.environ else os.environ["FD_PLUGINS"].split(","),
8787
# set trace attribute job_id.
@@ -105,5 +105,10 @@ def __getattr__(name: str):
105105
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
106106

107107

108+
def __setattr__(name: str, value: Any):
109+
assert name in environment_variables
110+
environment_variables[name] = lambda: value
111+
112+
108113
def __dir__():
109114
return list(environment_variables.keys())

fastdeploy/worker/worker_process.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,16 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
748748
logger.info(f"- Dynamic load weight: {load_config.dynamic_load_weight}")
749749
logger.info(f"- Load strategy: {load_config.load_strategy}")
750750

751+
if args.speculative_config is not None:
752+
logger.info("Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not support speculative decoding now.")
753+
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
754+
if args.splitwise_role != "mixed":
755+
logger.info(f"Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not supported {args.splitwise_role} now.")
756+
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
757+
if (not current_platform.is_cuda()) and (not current_platform.is_xpu()):
758+
logger.info("Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not supported.")
759+
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
760+
751761
fd_config = FDConfig(
752762
model_config=model_config,
753763
parallel_config=parallel_config,

0 commit comments

Comments
 (0)