create_weight_node accept non-ov backend buffer

wine99 · wine99 · commit 1d4ec1b2ee4c · 2026-02-11T14:20:14.000+08:00
diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -551,13 +551,13 @@ std::map<std::string, std::shared_ptr<ov::Node>> GgmlOvDecoder::create_weight_no
 }
 
 std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor * tensor) {
+    const bool is_ov_buffer = ggml_backend_buffer_is_openvino(tensor->buffer);
+
     // Check if we have a pre-built constant from the OpenVINO backend buffer
     // This is set during ggml_backend_openvino_buffer_set_tensor
     if (tensor->extra) {
-        if (!ggml_backend_buffer_is_openvino(tensor->buffer)) {
-            OPENVINO_ASSERT(false, "Unsupported weight tensor: " + std::string(tensor->name) +
-                                       " Possibly this is a cpu backend repacked quantized weights");
-        }
+        OPENVINO_ASSERT(is_ov_buffer, "Unsupported weight tensor: " + std::string(tensor->name) +
+                                          " Possibly this is a cpu backend repacked quantized weights");
         // Cast to our extra base type and check the type
         auto * extra_base = static_cast<ggml_openvino_extra_base *>(tensor->extra);
 
@@ -578,12 +578,7 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor * tensor
         }
     }
 
-    // Fallback: tensor doesn't have a pre-built extra. The buffer type can only be
-    // openvino_host_buffer_type, which has enough space (get_alloc_size returns
-    // layout.total_size for quantized 2D tensors) to store extracted data in-place.
-    // Build the weight node and store it in tensor->extra for future reuse.
     GGML_LOG_DEBUG("%s: creating new weight node for %s\n", __func__, tensor->name);
-
     static const std::set<ggml_type> weight_types = {GGML_TYPE_F32,  GGML_TYPE_F16,  GGML_TYPE_BF16,
                                                      GGML_TYPE_Q8_0, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
                                                      GGML_TYPE_Q4_K, GGML_TYPE_Q5_K, GGML_TYPE_Q6_K};
@@ -594,21 +589,28 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor * tensor
 
     OvWeight ov_weight;
     if (ggml_is_quantized(tensor->type)) {
-        // For quantized weights, copy raw data to a temp buffer first because
-        // process_weight_tensor reads from data and writes extracted results
-        // (weights/scales/zp) to output_base_ptr — they would overlap if both
-        // point to tensor->data.
-        size_t raw_size = ggml_nbytes(tensor);
-        std::vector<uint8_t> tmp(raw_size);
-        memcpy(tmp.data(), tensor->data, raw_size);
-        ov_weight = process_weight_tensor(tensor, tmp.data(), tensor->data);
+        if (is_ov_buffer) {
+            // For quantized weights, copy raw data to a temp buffer first because
+            // process_weight_tensor reads from data and writes extracted results
+            // (weights/scales/zp) to output_base_ptr — they would overlap if both
+            // point to tensor->data.
+            size_t raw_size = ggml_nbytes(tensor);
+            std::vector<uint8_t> tmp(raw_size);
+            memcpy(tmp.data(), tensor->data, raw_size);
+            ov_weight = process_weight_tensor(tensor, tmp.data(), tensor->data);
+        } else {
+            ov_weight = process_weight_tensor(tensor, tensor->data, nullptr);
+        }
     } else {
         // For non-quantized weights (F16/F32/BF16), data is already in tensor->data.
         // process_weight_tensor will create an ov::Tensor wrapping tensor->data directly.
         ov_weight = process_weight_tensor(tensor, tensor->data, tensor->data);
     }
 
     ov_weight.weight_node->set_friendly_name(tensor->name);
+    if (!is_ov_buffer) {
+        return ov_weight.weight_node;
+    }
 
     ggml_openvino_extra_base * extra;
     if (ov_weight.is_quantized()) {