Skip to content

Commit 1d4ec1b

Browse files
committed
create_weight_node accept non-ov backend buffer
1 parent 7b3b65b commit 1d4ec1b

1 file changed

Lines changed: 19 additions & 17 deletions

File tree

ggml/src/ggml-openvino/ggml-decoder.cpp

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -551,13 +551,13 @@ std::map<std::string, std::shared_ptr<ov::Node>> GgmlOvDecoder::create_weight_no
551551
}
552552

553553
std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor * tensor) {
554+
const bool is_ov_buffer = ggml_backend_buffer_is_openvino(tensor->buffer);
555+
554556
// Check if we have a pre-built constant from the OpenVINO backend buffer
555557
// This is set during ggml_backend_openvino_buffer_set_tensor
556558
if (tensor->extra) {
557-
if (!ggml_backend_buffer_is_openvino(tensor->buffer)) {
558-
OPENVINO_ASSERT(false, "Unsupported weight tensor: " + std::string(tensor->name) +
559-
" Possibly this is a cpu backend repacked quantized weights");
560-
}
559+
OPENVINO_ASSERT(is_ov_buffer, "Unsupported weight tensor: " + std::string(tensor->name) +
560+
" Possibly this is a cpu backend repacked quantized weights");
561561
// Cast to our extra base type and check the type
562562
auto * extra_base = static_cast<ggml_openvino_extra_base *>(tensor->extra);
563563

@@ -578,12 +578,7 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor * tensor
578578
}
579579
}
580580

581-
// Fallback: tensor doesn't have a pre-built extra. The buffer type can only be
582-
// openvino_host_buffer_type, which has enough space (get_alloc_size returns
583-
// layout.total_size for quantized 2D tensors) to store extracted data in-place.
584-
// Build the weight node and store it in tensor->extra for future reuse.
585581
GGML_LOG_DEBUG("%s: creating new weight node for %s\n", __func__, tensor->name);
586-
587582
static const std::set<ggml_type> weight_types = {GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16,
588583
GGML_TYPE_Q8_0, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
589584
GGML_TYPE_Q4_K, GGML_TYPE_Q5_K, GGML_TYPE_Q6_K};
@@ -594,21 +589,28 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor * tensor
594589

595590
OvWeight ov_weight;
596591
if (ggml_is_quantized(tensor->type)) {
597-
// For quantized weights, copy raw data to a temp buffer first because
598-
// process_weight_tensor reads from data and writes extracted results
599-
// (weights/scales/zp) to output_base_ptr — they would overlap if both
600-
// point to tensor->data.
601-
size_t raw_size = ggml_nbytes(tensor);
602-
std::vector<uint8_t> tmp(raw_size);
603-
memcpy(tmp.data(), tensor->data, raw_size);
604-
ov_weight = process_weight_tensor(tensor, tmp.data(), tensor->data);
592+
if (is_ov_buffer) {
593+
// For quantized weights, copy raw data to a temp buffer first because
594+
// process_weight_tensor reads from data and writes extracted results
595+
// (weights/scales/zp) to output_base_ptr — they would overlap if both
596+
// point to tensor->data.
597+
size_t raw_size = ggml_nbytes(tensor);
598+
std::vector<uint8_t> tmp(raw_size);
599+
memcpy(tmp.data(), tensor->data, raw_size);
600+
ov_weight = process_weight_tensor(tensor, tmp.data(), tensor->data);
601+
} else {
602+
ov_weight = process_weight_tensor(tensor, tensor->data, nullptr);
603+
}
605604
} else {
606605
// For non-quantized weights (F16/F32/BF16), data is already in tensor->data.
607606
// process_weight_tensor will create an ov::Tensor wrapping tensor->data directly.
608607
ov_weight = process_weight_tensor(tensor, tensor->data, tensor->data);
609608
}
610609

611610
ov_weight.weight_node->set_friendly_name(tensor->name);
611+
if (!is_ov_buffer) {
612+
return ov_weight.weight_node;
613+
}
612614

613615
ggml_openvino_extra_base * extra;
614616
if (ov_weight.is_quantized()) {

0 commit comments

Comments
 (0)