@@ -538,8 +538,8 @@ std::map<std::string, std::shared_ptr<ov::Node>> GgmlOvDecoder::create_weight_no
538538
539539// Static cache for quantized weight nodes (keyed by tensor data pointer)
540540// This is a fallback for when tensors don't have pre-built constants in extra
541- static std::unordered_map<const void *, std::shared_ptr<ov::Node>> s_quantized_weight_cache;
542- static std::mutex s_quantized_weight_cache_mutex;
541+ // static std::unordered_map<const void *, std::shared_ptr<ov::Node>> s_quantized_weight_cache;
542+ // static std::mutex s_quantized_weight_cache_mutex;
543543
544544std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node (ggml_tensor * tensor) {
545545 // Check if we have a pre-built constant from the OpenVINO backend buffer
@@ -571,14 +571,14 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor * tensor
571571
572572 // Fallback: Check static cache for quantized weights (keyed by data pointer)
573573 // This handles cases where tensors weren't loaded through OpenVINO buffer
574- if (ggml_is_quantized (tensor->type )) {
575- std::lock_guard<std::mutex> lock (s_quantized_weight_cache_mutex);
576- auto it = s_quantized_weight_cache.find (tensor->data );
577- if (it != s_quantized_weight_cache.end ()) {
578- GGML_LOG_DEBUG (" %s: using cached quantized constant for %s\n " , __func__, tensor->name );
579- return it->second ;
580- }
581- }
574+ // if (ggml_is_quantized(tensor->type)) {
575+ // std::lock_guard<std::mutex> lock(s_quantized_weight_cache_mutex);
576+ // auto it = s_quantized_weight_cache.find(tensor->data);
577+ // if (it != s_quantized_weight_cache.end()) {
578+ // GGML_LOG_DEBUG("%s: using cached quantized constant for %s\n", __func__, tensor->name);
579+ // return it->second;
580+ // }
581+ // }
582582
583583 GGML_LOG_DEBUG (" %s: creating new constant for %s (extra=%p)\n " , __func__, tensor->name , tensor->extra );
584584
@@ -593,11 +593,11 @@ std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor * tensor
593593 result->set_friendly_name (tensor->name );
594594
595595 // Cache the quantized weight node for future reuse
596- if (ggml_is_quantized (tensor->type )) {
597- std::lock_guard<std::mutex> lock (s_quantized_weight_cache_mutex);
598- s_quantized_weight_cache[tensor->data ] = result;
599- GGML_LOG_DEBUG (" %s: cached quantized constant for %s\n " , __func__, tensor->name );
600- }
596+ // if (ggml_is_quantized(tensor->type)) {
597+ // std::lock_guard<std::mutex> lock(s_quantized_weight_cache_mutex);
598+ // s_quantized_weight_cache[tensor->data] = result;
599+ // GGML_LOG_DEBUG("%s: cached quantized constant for %s\n", __func__, tensor->name);
600+ // }
601601
602602 return result;
603603}
0 commit comments