Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ git switch dev_backend_openvino

- **Windows:**
```bash
"C:\Program Files (x86)\Intel\openvino_2025.3.0\setupvars.bat"
"<OPENVINO_INSTALLATION_DIR>\setupvars.bat"
cmake -B build\ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON -DGGML_CPU_REPACK=OFF -DLLAMA_CURL=OFF -DCMAKE_TOOLCHAIN_FILE=C:\vcpkg\scripts\buildsystems\vcpkg.cmake
cmake --build build\ReleaseOV --parallel
```
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-openvino/ggml-openvino.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_bu

// Put kvcache on device memory for GPU (NPU memory is too small even for kvcache)
if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY && strncmp(tensor->name, "cache_", 6) == 0 && !ctx->is_remote &&
ggml_openvino_get_device_name() == "GPU") {
ggml_openvino_get_device_name() == "GPU" && !getenv("GGML_OPENVINO_STATEFUL_EXECUTION")) {
GGML_ASSERT(ctx->tensor_extras.empty());
auto device = ctx->device;
auto size = ctx->size;
Expand Down Expand Up @@ -943,7 +943,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
}

static bool ggml_backend_openvino_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
return ggml_backend_buft_is_openvino(buft) || ggml_backend_buft_is_openvino_host(buft);
return ggml_backend_buft_is_openvino(buft) || ggml_backend_buft_is_host(buft);
GGML_UNUSED(dev);
}

Expand Down
12 changes: 5 additions & 7 deletions ggml/src/ggml-openvino/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,14 +768,12 @@ graph_key compute_graph_key(ggml_cgraph * cgraph) {
graph_key key;
key.n_nodes = cgraph->n_nodes;

if (cgraph->n_nodes > 0) {
key.first_node_name = std::string(cgraph->nodes[0]->name);
key.last_node_name = std::string(cgraph->nodes[cgraph->n_nodes - 1]->name);
} else {
key.first_node_name = "";
key.last_node_name = "";
for (int i = 0; i < cgraph->n_nodes; ++i) {
const auto * node = cgraph->nodes[i];
if (node->op == GGML_OP_SET_ROWS && strncmp(node->src[2]->name, "cache_k_l0", 10) == 0) {
key.cache_k_l0 = node->src[2];
}
}

return key;
}

Expand Down
9 changes: 3 additions & 6 deletions ggml/src/ggml-openvino/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,17 @@

struct graph_key {
size_t n_nodes;
std::string first_node_name;
std::string last_node_name;
void * cache_k_l0;

bool operator==(const graph_key & other) const {
return n_nodes == other.n_nodes && first_node_name == other.first_node_name &&
last_node_name == other.last_node_name;
return n_nodes == other.n_nodes && cache_k_l0 == other.cache_k_l0;
}
};

struct graph_key_hash {
size_t operator()(const graph_key & key) const {
size_t h = std::hash<size_t>{}(key.n_nodes);
h ^= std::hash<std::string>{}(key.first_node_name) + 0x9e3779b9 + (h << 6) + (h >> 2);
h ^= std::hash<std::string>{}(key.last_node_name) + 0x9e3779b9 + (h << 6) + (h >> 2);
h ^= std::hash<void *>{}(key.cache_k_l0) + 0x9e3779b9 + (h << 6) + (h >> 2);
return h;
}
};
Expand Down