From 34ca8fff9d4454474ae0abb4e06cf8267f7b49cf Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin <shoumikhin@meta.com>
Date: Thu, 19 Mar 2026 14:02:30 -0700
Subject: [PATCH] Add OpenVINO backend to pip wheel for Linux x86_64 (#18309)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:

Rewrite the OpenVINO backend runtime to use dlopen/dlsym with the
OpenVINO C API, eliminating the build-time dependency on the OpenVINO
SDK. This follows the QNN backend's proven pattern: the backend links
statically into _portable_lib.so with zero external library dependencies
and loads libopenvino_c.so at runtime via dlopen when the user has the
openvino pip package installed.

Key design decisions:
- Use OpenVINO C API (openvino/c/openvino.h) instead of C++ API to
  enable dlopen with dlsym function pointers. The C++ class-based API
  (ov::Core, ov::InferRequest) cannot be resolved via dlsym.
- Forward-declare OpenVINO C types in OpenvinoApi.h instead of
  including headers, so there is no build-time SDK dependency at all.
- Thread-safe lazy loading via std::call_once in ensure_loaded().
- OPENVINO_LIB_PATH env var for explicit library path override.
- Backend always registers via static initializer (zero cost); actual
  OpenVINO loading deferred to first is_available()/init() call.
- No changes to portable_lib.py needed — no RTLD_GLOBAL, no separate
  .so, no auditwheel workarounds. The backend is statically linked
  into _portable_lib.so like XNNPACK and has no NEEDED entries for
  any OpenVINO library.
- Enabled on all Linux architectures (x86_64 and aarch64) since the
  backend has no build-time dependency and the openvino pip package
  supports both. ExecuTorch builds wheels for both platforms.

Robustness:
- LOAD_SYM macro zeros ov_ struct on partial failure to prevent
  dangling function pointers.
- Bounds check on execute() args (ET_CHECK_OR_RETURN_ERROR).
- Allocator null check with proper cleanup on failure.
- Device string safely constructed from CompileSpec buffer with
  trailing null byte stripping.
- Tensor rank validated against 1024 upper bound.
- create_ov_tensor uses RAII (unique_ptr) for heap dims allocation.
- Detailed comments on OpenVINO shared_ptr tensor ownership and
  destroy() lifecycle constraints.

User experience:
  pip install executorch            # XNNPACK+QNN+OpenVINO registered
  pip install executorch[openvino]  # + openvino runtime, backend activates

Differential Revision: D97202714
---
 .ci/scripts/wheel/pre_build_script.sh         |   7 +
 .ci/scripts/wheel/test_linux.py               |  12 +
 .ci/scripts/wheel/test_linux_aarch64.py       |  12 +
 README-wheel.md                               |   6 +-
 backends/openvino/CMakeLists.txt              |  22 +-
 backends/openvino/README.md                   |  31 ++
 backends/openvino/runtime/OpenvinoApi.h       | 133 ++++++
 backends/openvino/runtime/OpenvinoBackend.cpp | 393 +++++++++++++-----
 backends/openvino/runtime/OpenvinoBackend.h   |  31 +-
 docs/source/build-run-openvino.md             |  30 +-
 pyproject.toml                                |   5 +
 setup.py                                      |  12 +
 tools/cmake/preset/pybind.cmake               |   1 +
 13 files changed, 561 insertions(+), 134 deletions(-)
 create mode 100644 backends/openvino/runtime/OpenvinoApi.h

diff --git a/.ci/scripts/wheel/pre_build_script.sh b/.ci/scripts/wheel/pre_build_script.sh
index bf48d3bc29f..ac0617f6248 100755
--- a/.ci/scripts/wheel/pre_build_script.sh
+++ b/.ci/scripts/wheel/pre_build_script.sh
@@ -62,3 +62,10 @@ if [[ "$(uname -s)" == "Linux" && "$(uname -m)" == "x86_64" ]]; then
   echo "QNN_SDK_ROOT=${QNN_SDK_ROOT}" >> "${GITHUB_ENV}"
   echo "QNN SDK downloaded to ${QNN_SDK_ROOT}"
 fi
+
+# Install OpenVINO Python package on Linux for wheel testing.
+# The backend itself has no build-time dependency (uses dlopen at runtime).
+if [[ "$(uname -s)" == "Linux" ]]; then
+  echo "Installing OpenVINO runtime for testing..."
+  pip install "openvino>=2025.1.0,<2026.0.0"
+fi
diff --git a/.ci/scripts/wheel/test_linux.py b/.ci/scripts/wheel/test_linux.py
index 7802c319c0f..dd999061154 100644
--- a/.ci/scripts/wheel/test_linux.py
+++ b/.ci/scripts/wheel/test_linux.py
@@ -24,6 +24,18 @@
         ), f"QnnBackend not found in registered backends: {registered}"
         print("✓ QnnBackend is registered")
 
+    # On all Linux architectures the wheel includes the OpenVINO backend.
+    if platform.system() == "Linux":
+        from executorch.extension.pybindings.portable_lib import (
+            _get_registered_backend_names,
+        )
+
+        registered = _get_registered_backend_names()
+        assert (
+            "OpenvinoBackend" in registered
+        ), f"OpenvinoBackend not found in registered backends: {registered}"
+        print("✓ OpenvinoBackend is registered")
+
     test_base.run_tests(
         model_tests=[
             test_base.ModelTest(
diff --git a/.ci/scripts/wheel/test_linux_aarch64.py b/.ci/scripts/wheel/test_linux_aarch64.py
index d514738c005..22bdfb08b71 100644
--- a/.ci/scripts/wheel/test_linux_aarch64.py
+++ b/.ci/scripts/wheel/test_linux_aarch64.py
@@ -12,6 +12,18 @@
     # coremltools does not support linux aarch64 yet and install from the source fails on runtime
     # https://github.com/apple/coremltools/issues/1254
     # https://github.com/apple/coremltools/issues/2195
+
+    from executorch.extension.pybindings.portable_lib import (
+        _get_registered_backend_names,
+    )
+
+    registered = _get_registered_backend_names()
+
+    assert (
+        "OpenvinoBackend" in registered
+    ), f"OpenvinoBackend not found in registered backends: {registered}"
+    print("✓ OpenvinoBackend is registered")
+
     test_base.run_tests(
         model_tests=[
             test_base.ModelTest(
diff --git a/README-wheel.md b/README-wheel.md
index a1e70a2daef..69def2c31e1 100644
--- a/README-wheel.md
+++ b/README-wheel.md
@@ -6,7 +6,7 @@ PyTorch programs.
 
 The `executorch` pip package is in beta.
 * Supported python versions: 3.10, 3.11, 3.12, 3.13
-* Compatible systems: Linux x86_64, macOS aarch64
+* Compatible systems: Linux x86_64, Linux aarch64, macOS aarch64
 
 The prebuilt `executorch.runtime` module included in this package provides a way
 to run ExecuTorch `.pte` files, with some restrictions:
@@ -14,6 +14,10 @@ to run ExecuTorch `.pte` files, with some restrictions:
 * Only the [XNNPACK backend delegate](docs/source/backends/xnnpack/xnnpack-overview.md) is linked into the prebuilt module.
 * \[macOS only] [Core ML](docs/source/backends/coreml/coreml-overview.md) and [MPS](docs/source/backends/mps/mps-overview.md) backend
   are also linked into the prebuilt module.
+* \[Linux x86_64] [QNN](docs/source/backends-qualcomm.md) backend is linked into the prebuilt module.
+* \[Linux] [OpenVINO](docs/source/build-run-openvino.md) backend is also linked into the
+  prebuilt module. OpenVINO requires the runtime to be installed separately:
+  `pip install executorch[openvino]`
 
 Please visit the [ExecuTorch website](https://pytorch.org/executorch) for
 tutorials and documentation. Here are some starting points:
diff --git a/backends/openvino/CMakeLists.txt b/backends/openvino/CMakeLists.txt
index 736ed6d8603..5b7a1349bf5 100644
--- a/backends/openvino/CMakeLists.txt
+++ b/backends/openvino/CMakeLists.txt
@@ -28,29 +28,27 @@ set(COMMON_INCLUDE_DIRS ${EXECUTORCH_ROOT}/..)
 # Include utility CMake scripts from ExecuteTorch
 include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 
-# Find OpenVINO libraries
-find_package(OpenVINO REQUIRED)
+# The backend resolves OpenVINO C API symbols via dlopen/dlsym at runtime, so
+# there is no build-time dependency on the OpenVINO SDK.
 
 # Define OpenVINO backend as a static library
-add_library(openvino_backend STATIC .)
+add_library(openvino_backend STATIC)
 
 # Enable exceptions and RTTI for OpenVINO backend
 target_compile_options(openvino_backend PRIVATE -frtti -fexceptions)
 
-# Include Executorch directories
-target_include_directories(openvino_backend PRIVATE ${COMMON_INCLUDE_DIRS})
-
-# Link OpenVINO and ExecuteTorch core libraries
-target_link_libraries(
-  openvino_backend PRIVATE openvino::runtime executorch_core
-)
-
 # Add source files for OpenVINO backend
 target_sources(
   openvino_backend
   PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/OpenvinoBackend.cpp
 )
 
+# Include Executorch directories
+target_include_directories(openvino_backend PRIVATE ${COMMON_INCLUDE_DIRS})
+
+# Link ExecuteTorch core and dynamic loading libraries
+target_link_libraries(openvino_backend PRIVATE executorch_core ${CMAKE_DL_LIBS})
+
 executorch_target_link_options_shared_lib(openvino_backend)
 
 # Install OpenVINO backend library to the lib directory
@@ -58,6 +56,4 @@ install(
   TARGETS openvino_backend
   EXPORT ExecuTorchTargets
   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  INCLUDES
-  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
 )
diff --git a/backends/openvino/README.md b/backends/openvino/README.md
index fcc8c32267c..e905224bec4 100644
--- a/backends/openvino/README.md
+++ b/backends/openvino/README.md
@@ -12,6 +12,36 @@ OpenVINO backend supports the following hardware:
 
 For more information on the supported hardware, please refer to [OpenVINO System Requirements](https://docs.openvino.ai/2025/about-openvino/release-notes-openvino/system-requirements.html) page.
 
+## Quick Start (pip wheel)
+
+On Linux, the OpenVINO backend is included in the ExecuTorch pip wheel. Install the OpenVINO runtime to activate it:
+
+```bash
+pip install executorch[openvino]
+```
+
+Set the library path so the backend can find the OpenVINO runtime:
+
+```bash
+export LD_LIBRARY_PATH="$(python3 -c "import openvino, os; print(os.path.join(os.path.dirname(openvino.__file__), 'libs'))")${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+```
+
+Or point to the library directly:
+
+```bash
+export OPENVINO_LIB_PATH=$(python3 -c "import openvino, os; print(os.path.join(os.path.dirname(openvino.__file__), 'libs', 'libopenvino_c.so'))")
+```
+
+Verify the backend is available:
+
+```python
+from executorch.extension.pybindings.portable_lib import (
+    _get_registered_backend_names,
+)
+print(_get_registered_backend_names())
+# Should include 'OpenvinoBackend'
+```
+
 ## Directory Structure
 
 ```
@@ -24,6 +54,7 @@ executorch
 │           ├── __init__.py
 │           └── quantizer.py
 │       ├── runtime
+│           ├── OpenvinoApi.h
 │           ├── OpenvinoBackend.cpp
 │           └── OpenvinoBackend.h
 │       ├── scripts
diff --git a/backends/openvino/runtime/OpenvinoApi.h b/backends/openvino/runtime/OpenvinoApi.h
new file mode 100644
index 00000000000..90403e24b95
--- /dev/null
+++ b/backends/openvino/runtime/OpenvinoApi.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) Intel Corporation
+ *
+ * Licensed under the BSD License (the "License"); you may not use this file
+ * except in compliance with the License. See the license file found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <dlfcn.h>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+namespace executorch {
+namespace backends {
+namespace openvino {
+
+// Forward declarations matching the OpenVINO C API opaque types.
+// Only pointer types are used so struct layout is irrelevant.
+typedef struct ov_core ov_core_t;
+typedef struct ov_compiled_model ov_compiled_model_t;
+typedef struct ov_infer_request ov_infer_request_t;
+typedef struct ov_tensor ov_tensor_t;
+
+// Value types reproduced from openvino/c/ov_shape.h and ov_common.h.
+// These are stable C ABI — pinned via version constraint in pyproject.toml.
+typedef struct {
+  int64_t rank;
+  int64_t* dims;
+} ov_shape_t;
+
+typedef struct {
+  char** devices;
+  size_t size;
+} ov_available_devices_t;
+
+// Intentionally partial — only OV_STATUS_OK is needed for success checks.
+// The full enum is defined in openvino/c/ov_common.h.
+typedef enum {
+  OV_STATUS_OK = 0,
+  OV_STATUS_GENERAL_ERROR = -1,
+} ov_status_e;
+
+// Values aligned with ov::element::Type_t (sequential enum).
+typedef enum {
+  OV_ELEMENT_UNDEFINED = 0,
+  OV_ELEMENT_BOOLEAN = 1,
+  OV_ELEMENT_BF16 = 2,
+  OV_ELEMENT_F16 = 3,
+  OV_ELEMENT_F32 = 4,
+  OV_ELEMENT_F64 = 5,
+  OV_ELEMENT_I4 = 6,
+  OV_ELEMENT_I8 = 7,
+  OV_ELEMENT_I16 = 8,
+  OV_ELEMENT_I32 = 9,
+  OV_ELEMENT_I64 = 10,
+  OV_ELEMENT_U1 = 11,
+  OV_ELEMENT_U2 = 12,
+  OV_ELEMENT_U3 = 13,
+  OV_ELEMENT_U4 = 14,
+  OV_ELEMENT_U6 = 15,
+  OV_ELEMENT_U8 = 16,
+} ov_element_type_e;
+
+// Function pointer types for each OpenVINO C API function we use.
+using ov_core_create_fn = ov_status_e (*)(ov_core_t**);
+using ov_core_free_fn = void (*)(ov_core_t*);
+using ov_core_get_available_devices_fn =
+    ov_status_e (*)(const ov_core_t*, ov_available_devices_t*);
+using ov_available_devices_free_fn = void (*)(ov_available_devices_t*);
+using ov_core_import_model_fn = ov_status_e (*)(
+    const ov_core_t*,
+    const char*,
+    size_t,
+    const char*,
+    ov_compiled_model_t**);
+using ov_compiled_model_create_infer_request_fn =
+    ov_status_e (*)(const ov_compiled_model_t*, ov_infer_request_t**);
+using ov_compiled_model_inputs_size_fn =
+    ov_status_e (*)(const ov_compiled_model_t*, size_t*);
+using ov_compiled_model_outputs_size_fn =
+    ov_status_e (*)(const ov_compiled_model_t*, size_t*);
+using ov_compiled_model_free_fn = void (*)(ov_compiled_model_t*);
+using ov_infer_request_set_input_tensor_by_index_fn =
+    ov_status_e (*)(ov_infer_request_t*, size_t, const ov_tensor_t*);
+using ov_infer_request_set_output_tensor_by_index_fn =
+    ov_status_e (*)(ov_infer_request_t*, size_t, const ov_tensor_t*);
+using ov_infer_request_infer_fn = ov_status_e (*)(ov_infer_request_t*);
+using ov_infer_request_free_fn = void (*)(ov_infer_request_t*);
+using ov_tensor_create_from_host_ptr_fn =
+    ov_status_e (*)(ov_element_type_e, ov_shape_t, void*, ov_tensor_t**);
+using ov_tensor_free_fn = void (*)(ov_tensor_t*);
+using ov_shape_create_fn =
+    ov_status_e (*)(int64_t, const int64_t*, ov_shape_t*);
+using ov_shape_free_fn = ov_status_e (*)(ov_shape_t*);
+
+struct DlCloser {
+  void operator()(void* handle) {
+    if (handle) {
+      dlclose(handle);
+    }
+  }
+};
+using DlHandle = std::unique_ptr<void, DlCloser>;
+
+struct OpenvinoFunctions {
+  ov_core_create_fn core_create = nullptr;
+  ov_core_free_fn core_free = nullptr;
+  ov_core_get_available_devices_fn core_get_available_devices = nullptr;
+  ov_available_devices_free_fn available_devices_free = nullptr;
+  ov_core_import_model_fn core_import_model = nullptr;
+  ov_compiled_model_create_infer_request_fn
+      compiled_model_create_infer_request = nullptr;
+  ov_compiled_model_inputs_size_fn compiled_model_inputs_size = nullptr;
+  ov_compiled_model_outputs_size_fn compiled_model_outputs_size = nullptr;
+  ov_compiled_model_free_fn compiled_model_free = nullptr;
+  ov_infer_request_set_input_tensor_by_index_fn
+      infer_request_set_input_tensor_by_index = nullptr;
+  ov_infer_request_set_output_tensor_by_index_fn
+      infer_request_set_output_tensor_by_index = nullptr;
+  ov_infer_request_infer_fn infer_request_infer = nullptr;
+  ov_infer_request_free_fn infer_request_free = nullptr;
+  ov_tensor_create_from_host_ptr_fn tensor_create_from_host_ptr = nullptr;
+  ov_tensor_free_fn tensor_free = nullptr;
+  ov_shape_create_fn shape_create = nullptr;
+  ov_shape_free_fn shape_free = nullptr;
+};
+
+} // namespace openvino
+} // namespace backends
+} // namespace executorch
diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp
index bac006ce916..a09c8d9f1cb 100644
--- a/backends/openvino/runtime/OpenvinoBackend.cpp
+++ b/backends/openvino/runtime/OpenvinoBackend.cpp
@@ -5,16 +5,14 @@
  *  LICENSE file in the root directory of this source tree.
  */
 
+#include <cinttypes>
+#include <cstdlib>
 #include <cstring>
-#include <iostream>
-#include <memory>
-
-#include <openvino/openvino.hpp>
+#include <string>
 
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/evalue.h>
-#include <executorch/runtime/core/exec_aten/util/dim_order_util.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 
 #include "OpenvinoBackend.h"
@@ -23,68 +21,175 @@ namespace executorch {
 namespace backends {
 namespace openvino {
 
-OpenvinoBackend::OpenvinoBackend() {}
+namespace {
+
+constexpr const char* kDefaultLibName = "libopenvino_c.so";
 
-bool OpenvinoBackend::is_available() const {
-  try {
-    // Create an OpenVINO Core object to verify runtime availability
-    ov::Core core;
-
-    // Check if at least one device is available
-    auto devices = core.get_available_devices();
-    if (!devices.empty()) {
-      return true; // OpenVINO is available
+template <typename FuncPtr>
+FuncPtr load_symbol(void* handle, const char* name) {
+  void* sym = dlsym(handle, name);
+  if (!sym) {
+    ET_LOG(
+        Error, "OpenVINO: failed to resolve symbol '%s': %s", name, dlerror());
+  }
+  return reinterpret_cast<FuncPtr>(sym);
+}
+
+} // namespace
+
+// Loading is attempted exactly once via std::call_once.  If the first attempt
+// fails (e.g. library not on LD_LIBRARY_PATH), subsequent calls return false
+// without retrying.  Users must fix their environment and restart the process.
+bool OpenvinoBackend::ensure_loaded() const {
+  std::call_once(load_flag_, [this]() {
+    const char* lib_path = std::getenv("OPENVINO_LIB_PATH");
+    const char* effective_path = lib_path ? lib_path : kDefaultLibName;
+
+    void* handle = dlopen(effective_path, RTLD_NOW | RTLD_LOCAL);
+    if (!handle) {
+      ET_LOG(
+          Error,
+          "OpenVINO runtime not found (dlopen failed: %s). "
+          "Ensure 'libopenvino_c.so' is on your library search path "
+          "(set OPENVINO_LIB_PATH or LD_LIBRARY_PATH), or install with: "
+          "pip install \"openvino>=2025.1.0,<2026.0.0\"",
+          dlerror());
+      return;
     }
-  } catch (const std::exception& e) {
-    // Log the exception if OpenVINO runtime is not available
-    ET_LOG(Error, "OpenVINO is not available: %s", e.what());
-  } catch (...) {
-    // Handle any unexpected errors
+    lib_handle_.reset(handle);
+
+#define LOAD_SYM(field, symbol_name)                                  \
+  ov_.field = load_symbol<decltype(ov_.field)>(handle, #symbol_name); \
+  if (!ov_.field) {                                                   \
+    ov_ = OpenvinoFunctions{};                                        \
+    lib_handle_.reset();                                              \
+    return;                                                           \
+  }
+
+    LOAD_SYM(core_create, ov_core_create)
+    LOAD_SYM(core_free, ov_core_free)
+    LOAD_SYM(core_get_available_devices, ov_core_get_available_devices)
+    LOAD_SYM(available_devices_free, ov_available_devices_free)
+    LOAD_SYM(core_import_model, ov_core_import_model)
+    LOAD_SYM(
+        compiled_model_create_infer_request,
+        ov_compiled_model_create_infer_request)
+    LOAD_SYM(compiled_model_inputs_size, ov_compiled_model_inputs_size)
+    LOAD_SYM(compiled_model_outputs_size, ov_compiled_model_outputs_size)
+    LOAD_SYM(compiled_model_free, ov_compiled_model_free)
+    LOAD_SYM(
+        infer_request_set_input_tensor_by_index,
+        ov_infer_request_set_input_tensor_by_index)
+    LOAD_SYM(
+        infer_request_set_output_tensor_by_index,
+        ov_infer_request_set_output_tensor_by_index)
+    LOAD_SYM(infer_request_infer, ov_infer_request_infer)
+    LOAD_SYM(infer_request_free, ov_infer_request_free)
+    LOAD_SYM(tensor_create_from_host_ptr, ov_tensor_create_from_host_ptr)
+    LOAD_SYM(tensor_free, ov_tensor_free)
+    LOAD_SYM(shape_create, ov_shape_create)
+    LOAD_SYM(shape_free, ov_shape_free)
+
+#undef LOAD_SYM
+
+    loaded_ = true;
     ET_LOG(
-        Error, "OpenVINO availability check failed due to an unknown error.");
+        Info,
+        "OpenVINO: runtime loaded successfully from '%s'",
+        effective_path);
+  });
+  return loaded_;
+}
+
+bool OpenvinoBackend::is_available() const {
+  if (!ensure_loaded()) {
+    return false;
+  }
+
+  ov_core_t* core = nullptr;
+  ov_status_e status = ov_.core_create(&core);
+  if (status != OV_STATUS_OK || !core) {
+    return false;
   }
 
-  return false; // OpenVINO is not available
+  ov_available_devices_t devices = {};
+  status = ov_.core_get_available_devices(core, &devices);
+  bool available = (status == OV_STATUS_OK && devices.size > 0);
+
+  if (devices.devices) {
+    ov_.available_devices_free(&devices);
+  }
+  ov_.core_free(core);
+  return available;
 }
 
 exr::Result<exr::DelegateHandle*> OpenvinoBackend::init(
     exr::BackendInitContext& context,
     exr::FreeableBuffer* processed,
     exr::ArrayRef<exr::CompileSpec> compile_specs) const {
-  ET_LOG(Info, "OpenvinoBackend::init %p", processed->data());
+  if (!ensure_loaded()) {
+    return exr::Error::NotFound;
+  }
+
+  ov_core_t* core = nullptr;
+  ov_status_e status = ov_.core_create(&core);
+  if (status != OV_STATUS_OK || !core) {
+    ET_LOG(Error, "OpenVINO: failed to create core (status=%d)", status);
+    return exr::Error::Internal;
+  }
 
-  ov::Core core;
   const char* data_ptr = static_cast<const char*>(processed->data());
   size_t data_size = processed->size();
 
-  // Copy data to a string or vector
-  std::string data_string(data_ptr, data_size);
-
-  // Wrap the data in a stream
-  std::istringstream compiled_stream(data_string);
-
-  auto device = "CPU";
-  // Get the device value, if provided in compile sepcs
+  std::string device = "CPU";
   for (auto& compile_spec : compile_specs) {
-    if (std::strcmp(compile_spec.key, "device") == 0)
-      device = static_cast<char*>(compile_spec.value.buffer);
+    if (std::strcmp(compile_spec.key, "device") == 0) {
+      const char* buf = static_cast<const char*>(compile_spec.value.buffer);
+      size_t len = compile_spec.value.nbytes;
+      // Strip trailing null bytes that may be included in nbytes.
+      while (len > 0 && buf[len - 1] == '\0') {
+        --len;
+      }
+      device.assign(buf, len);
+    }
   }
 
-  // Import the model
-  auto compiled_model = core.import_model(compiled_stream, device);
+  ov_compiled_model_t* compiled_model = nullptr;
+  status = ov_.core_import_model(
+      core, data_ptr, data_size, device.c_str(), &compiled_model);
+  ov_.core_free(core);
+
+  if (status != OV_STATUS_OK || !compiled_model) {
+    ET_LOG(
+        Error,
+        "OpenVINO: failed to import model for device '%s' (status=%d)",
+        device.c_str(),
+        status);
+    return exr::Error::Internal;
+  }
 
-  // The processed data can be freed since the model is compiled
   processed->Free();
 
-  // Allocate an infer request
-  std::shared_ptr<ov::InferRequest> infer_request =
-      std::make_shared<ov::InferRequest>(compiled_model.create_infer_request());
+  ov_infer_request_t* infer_request = nullptr;
+  status =
+      ov_.compiled_model_create_infer_request(compiled_model, &infer_request);
+  if (status != OV_STATUS_OK || !infer_request) {
+    ET_LOG(
+        Error, "OpenVINO: failed to create infer request (status=%d)", status);
+    ov_.compiled_model_free(compiled_model);
+    return exr::Error::Internal;
+  }
 
-  // Allocate execution handle
   exr::MemoryAllocator* allocator = context.get_runtime_allocator();
   ExecutionHandle* handle = allocator->allocateInstance<ExecutionHandle>();
+  if (!handle) {
+    ET_LOG(Error, "OpenVINO: failed to allocate ExecutionHandle");
+    ov_.infer_request_free(infer_request);
+    ov_.compiled_model_free(compiled_model);
+    return exr::Error::MemoryAllocationFailed;
+  }
   new (handle) ExecutionHandle;
-  handle->compiled_model = std::make_shared<ov::CompiledModel>(compiled_model);
+  handle->compiled_model = compiled_model;
   handle->infer_request = infer_request;
 
   return handle;
@@ -94,111 +199,191 @@ exr::Error OpenvinoBackend::execute(
     exr::BackendExecutionContext& context,
     exr::DelegateHandle* input_handle,
     exr::Span<exr::EValue*> args) const {
-  ExecutionHandle* execution_handle = (ExecutionHandle*)input_handle;
-
-  auto infer_request = execution_handle->infer_request;
+  ExecutionHandle* execution_handle =
+      static_cast<ExecutionHandle*>(input_handle);
+
+  size_t num_inputs = 0;
+  size_t num_outputs = 0;
+  ov_status_e status = ov_.compiled_model_inputs_size(
+      execution_handle->compiled_model, &num_inputs);
+  if (status != OV_STATUS_OK) {
+    return exr::Error::Internal;
+  }
+  status = ov_.compiled_model_outputs_size(
+      execution_handle->compiled_model, &num_outputs);
+  if (status != OV_STATUS_OK) {
+    return exr::Error::Internal;
+  }
 
-  size_t num_inputs = infer_request->get_compiled_model().inputs().size();
-  size_t num_outputs = infer_request->get_compiled_model().outputs().size();
+  ET_CHECK_OR_RETURN_ERROR(
+      num_inputs + num_outputs == args.size(),
+      InvalidArgument,
+      "OpenVINO: expected %zu args (inputs=%zu + outputs=%zu), got %zu",
+      num_inputs + num_outputs,
+      num_inputs,
+      num_outputs,
+      args.size());
 
-  // Set inputs
   for (size_t i = 0; i < num_inputs; i++) {
-    auto input_tensor = args[i]->toTensor();
-    ov::Shape input_shape(
-        input_tensor.sizes().begin(), input_tensor.sizes().end());
-
-    // Convert input tensor to OpenVINO tensor
-    ov::element::Type ov_type =
-        convert_to_openvino_type(input_tensor.scalar_type());
-    ov::Tensor ov_input_tensor(
-        ov_type, input_shape, input_tensor.mutable_data_ptr());
-
-    infer_request->set_input_tensor(i, ov_input_tensor);
+    ov_tensor_t* tensor = nullptr;
 
     if (args[i]->isInt()) {
       int64_t* val = &(args[i]->payload.copyable_union.as_int);
-
-      // Create OpenVINO tensor from integer input
-      ov::Tensor ov_input_tensor(ov::element::i64, ov::Shape{1}, val);
-      infer_request->set_input_tensor(i, ov_input_tensor);
+      int64_t dims[] = {1};
+      ov_shape_t shape = {};
+      status = ov_.shape_create(1, dims, &shape);
+      if (status != OV_STATUS_OK) {
+        return exr::Error::Internal;
+      }
+      status =
+          ov_.tensor_create_from_host_ptr(OV_ELEMENT_I64, shape, val, &tensor);
+      ov_.shape_free(&shape);
+      if (status != OV_STATUS_OK || !tensor) {
+        return exr::Error::Internal;
+      }
     } else {
       auto input_tensor = args[i]->toTensor();
-      ov::Shape input_shape(
-          input_tensor.sizes().begin(), input_tensor.sizes().end());
-
-      // Convert input tensor to OpenVINO tensor
-      ov::element::Type ov_type =
-          convert_to_openvino_type(input_tensor.scalar_type());
-      ov::Tensor ov_input_tensor(
-          ov_type, input_shape, input_tensor.mutable_data_ptr());
+      auto result = create_ov_tensor(input_tensor);
+      if (!result.ok()) {
+        return result.error();
+      }
+      tensor = result.get();
+    }
 
-      infer_request->set_input_tensor(i, ov_input_tensor);
+    status = ov_.infer_request_set_input_tensor_by_index(
+        execution_handle->infer_request, i, tensor);
+    // Safe to free: the OpenVINO C API wraps ov::Tensor in a shared_ptr
+    // (see ov_tensor struct in openvino/src/bindings/c/src/common.h).
+    // set_input_tensor dereferences the shared_ptr and passes by value to the
+    // C++ InferRequest, which stores its own shared_ptr copy.  Freeing the
+    // C wrapper here only decrements the refcount; the tensor stays alive
+    // inside the infer_request until it is freed or overwritten.
+    ov_.tensor_free(tensor);
+    if (status != OV_STATUS_OK) {
+      return exr::Error::Internal;
     }
   }
 
-  // Set outputs
   for (size_t i = 0; i < num_outputs; i++) {
     auto output_tensor = args[num_inputs + i]->toTensor();
-    ov::Shape output_shape(
-        output_tensor.sizes().begin(), output_tensor.sizes().end());
-
-    // Convert input tensor to OpenVINO tensor
-    ov::element::Type ov_type =
-        convert_to_openvino_type(output_tensor.scalar_type());
-    ov::Tensor ov_output_tensor(
-        ov_type, output_shape, output_tensor.mutable_data_ptr());
-
-    infer_request->set_output_tensor(i, ov_output_tensor);
+    auto result = create_ov_tensor(output_tensor);
+    if (!result.ok()) {
+      return result.error();
+    }
+    ov_tensor_t* tensor = result.get();
+
+    status = ov_.infer_request_set_output_tensor_by_index(
+        execution_handle->infer_request, i, tensor);
+    // Safe to free: see shared_ptr ownership comment on input tensor above.
+    ov_.tensor_free(tensor);
+    if (status != OV_STATUS_OK) {
+      return exr::Error::Internal;
+    }
   }
 
-  // Execute the inference
-  infer_request->infer();
+  status = ov_.infer_request_infer(execution_handle->infer_request);
+  if (status != OV_STATUS_OK) {
+    ET_LOG(Error, "OpenVINO: inference failed (status=%d)", status);
+    return exr::Error::Internal;
+  }
 
   return exr::Error::Ok;
 }
 
+// Lifecycle note: destroy() is only called for handles returned by a
+// successful init(), which requires ensure_loaded() to have succeeded.
+// The function-pointer null checks below are an extra safety net in case
+// the library was torn down out of order (e.g. process exit).
 void OpenvinoBackend::destroy(exr::DelegateHandle* handle) const {
   if (!handle) {
-    ET_LOG(Info, "Attempted to destroy a null handle.");
     return;
   }
 
-  // Cast the handle to the appropriate type
   ExecutionHandle* execution_handle = static_cast<ExecutionHandle*>(handle);
 
-  // Clean up resources
-  if (execution_handle->infer_request) {
-    execution_handle->infer_request.reset(); // Release the infer request
-    ET_LOG(Info, "Infer request successfully destroyed.");
+  if (execution_handle->infer_request && ov_.infer_request_free) {
+    ov_.infer_request_free(execution_handle->infer_request);
+    execution_handle->infer_request = nullptr;
   }
 
-  if (execution_handle->compiled_model) {
-    execution_handle->compiled_model.reset(); // Release the compiled model
-    ET_LOG(Info, "Compiled model successfully destroyed.");
+  if (execution_handle->compiled_model && ov_.compiled_model_free) {
+    ov_.compiled_model_free(execution_handle->compiled_model);
+    execution_handle->compiled_model = nullptr;
   }
+}
 
-  ET_LOG(Info, "Delegate handle destroyed successfully.");
+exr::Result<ov_tensor_t*> OpenvinoBackend::create_ov_tensor(
+    const exa::Tensor& tensor) const {
+  ov_element_type_e ov_type = convert_to_openvino_type(tensor.scalar_type());
+  if (ov_type == OV_ELEMENT_UNDEFINED) {
+    return exr::Error::NotSupported;
+  }
+  auto sizes = tensor.sizes();
+  int64_t rank = sizes.size();
+  ET_CHECK_OR_RETURN_ERROR(
+      rank >= 0 && rank <= 1024,
+      InvalidArgument,
+      "OpenVINO: unreasonable tensor rank %" PRId64,
+      rank);
+  // Stack buffer for common ranks; heap-allocate via unique_ptr for larger.
+  int64_t dims_buf[8];
+  std::unique_ptr<int64_t[]> dims_heap;
+  int64_t* dims = dims_buf;
+  if (rank > 8) {
+    dims_heap.reset(new int64_t[rank]);
+    dims = dims_heap.get();
+  }
+  for (int64_t d = 0; d < rank; d++) {
+    dims[d] = sizes[d];
+  }
+  // shape is zero-initialized; shape_free is only needed after a successful
+  // shape_create (the zero state is safe to skip).
+  ov_shape_t shape = {};
+  ov_status_e status = ov_.shape_create(rank, dims, &shape);
+  dims_heap.reset(); // Release heap dims (no-op if stack was used).
+  if (status != OV_STATUS_OK) {
+    return exr::Error::Internal;
+  }
+
+  ov_tensor_t* ov_tensor = nullptr;
+  status = ov_.tensor_create_from_host_ptr(
+      ov_type, shape, tensor.mutable_data_ptr(), &ov_tensor);
+  ov_.shape_free(&shape);
+  if (status != OV_STATUS_OK || !ov_tensor) {
+    return exr::Error::Internal;
+  }
+  return ov_tensor;
 }
 
-ov::element::Type OpenvinoBackend::convert_to_openvino_type(
+ov_element_type_e OpenvinoBackend::convert_to_openvino_type(
     exa::ScalarType scalar_type) const {
   switch (scalar_type) {
     case exa::ScalarType::Float:
-      return ov::element::f32;
+      return OV_ELEMENT_F32;
     case exa::ScalarType::Half:
-      return ov::element::f16;
+      return OV_ELEMENT_F16;
     case exa::ScalarType::Int:
-      return ov::element::i32;
+      return OV_ELEMENT_I32;
     case exa::ScalarType::Char:
-      return ov::element::i8;
+      return OV_ELEMENT_I8;
     case exa::ScalarType::Byte:
-      return ov::element::u8;
+      return OV_ELEMENT_U8;
     case exa::ScalarType::Long:
-      return ov::element::i64;
+      return OV_ELEMENT_I64;
     case exa::ScalarType::Bool:
-      return ov::element::boolean;
+      return OV_ELEMENT_BOOLEAN;
+    case exa::ScalarType::BFloat16:
+      return OV_ELEMENT_BF16;
+    case exa::ScalarType::Double:
+      return OV_ELEMENT_F64;
+    case exa::ScalarType::Short:
+      return OV_ELEMENT_I16;
     default:
-      throw std::runtime_error("Unsupported scalar type");
+      ET_LOG(
+          Error,
+          "OpenVINO: unsupported scalar type %d",
+          static_cast<int>(scalar_type));
+      return OV_ELEMENT_UNDEFINED;
   }
 }
 
diff --git a/backends/openvino/runtime/OpenvinoBackend.h b/backends/openvino/runtime/OpenvinoBackend.h
index d84e3ba1f86..b0e60a3e068 100644
--- a/backends/openvino/runtime/OpenvinoBackend.h
+++ b/backends/openvino/runtime/OpenvinoBackend.h
@@ -8,36 +8,34 @@
 #ifndef OPENVINO_BACKEND_H
 #define OPENVINO_BACKEND_H
 
-#include <openvino/openvino.hpp>
-#include <iostream>
 #include <memory>
+#include <mutex>
 
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/evalue.h>
-#include <executorch/runtime/core/exec_aten/util/dim_order_util.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 
+#include "OpenvinoApi.h"
+
 namespace exr = executorch::runtime;
 namespace exa = executorch::aten;
 
-using namespace std;
-
 namespace executorch {
 namespace backends {
 namespace openvino {
 
-typedef struct {
-  std::shared_ptr<ov::CompiledModel> compiled_model;
-  std::shared_ptr<ov::InferRequest> infer_request;
-} ExecutionHandle;
+struct ExecutionHandle {
+  ov_compiled_model_t* compiled_model = nullptr;
+  ov_infer_request_t* infer_request = nullptr;
+};
 
 class OpenvinoBackend final : public ::exr::BackendInterface {
  public:
-  OpenvinoBackend();
-  ~OpenvinoBackend() = default;
+  OpenvinoBackend() = default;
+  ~OpenvinoBackend() override = default;
 
-  virtual bool is_available() const override;
+  bool is_available() const override;
   exr::Result<exr::DelegateHandle*> init(
       exr::BackendInitContext& context,
       exr::FreeableBuffer* processed,
@@ -49,7 +47,14 @@ class OpenvinoBackend final : public ::exr::BackendInterface {
   void destroy(exr::DelegateHandle* handle) const override;
 
  private:
-  ov::element::Type convert_to_openvino_type(exa::ScalarType scalar_type) const;
+  bool ensure_loaded() const;
+  ov_element_type_e convert_to_openvino_type(exa::ScalarType scalar_type) const;
+  exr::Result<ov_tensor_t*> create_ov_tensor(const exa::Tensor& tensor) const;
+
+  mutable DlHandle lib_handle_;
+  mutable OpenvinoFunctions ov_;
+  mutable std::once_flag load_flag_;
+  mutable bool loaded_ = false;
 };
 
 } // namespace openvino
diff --git a/docs/source/build-run-openvino.md b/docs/source/build-run-openvino.md
index 9b4c48fee5a..9c4cba40ed0 100644
--- a/docs/source/build-run-openvino.md
+++ b/docs/source/build-run-openvino.md
@@ -35,10 +35,35 @@ For more information on the supported hardware, please refer to [OpenVINO System
 
 ## Instructions for Building OpenVINO Backend
 
+### Quick Start (pip wheel, Linux)
+
+The easiest way to use the OpenVINO backend is via the prebuilt pip wheel:
+
+```bash
+pip install executorch[openvino]
+```
+
+Set the library path so the backend can find the OpenVINO C runtime:
+
+```bash
+export LD_LIBRARY_PATH="$(python3 -c "import openvino, os; print(os.path.join(os.path.dirname(openvino.__file__), 'libs'))")${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+```
+
+See the [OpenVINO backend README](../../backends/openvino/README.md) for verification steps and troubleshooting.
+
+### Building from Source
+
+If you need a custom build (different platform, custom configuration, etc.), follow the instructions below.
+
 ### Prerequisites
 
-Before you begin, ensure you have openvino installed and configured on your system:
+Before you begin, ensure you have the OpenVINO runtime installed and configured on your system. For most users, we recommend installing it via pip:
+
+```bash
+pip install openvino
+```
 
+Alternatively, you can build OpenVINO from source or follow the official [OpenVINO installation guide](https://docs.openvino.ai/2025/get-started/install-openvino.html) for platform-specific options:
 
 ```bash
 git clone https://github.com/openvinotoolkit/openvino.git
@@ -54,8 +79,7 @@ cmake --install build --prefix <your_preferred_install_location>
 cd <your_preferred_install_location>
 source setupvars.sh
 ```
-Note: The OpenVINO backend is not yet supported with the current OpenVINO release packages. It is recommended to build from source. The instructions for using OpenVINO release packages will be added soon.
-For more information about OpenVINO build, refer to the [OpenVINO Build Instructions](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build_linux.md).
+For more information about building OpenVINO from source, refer to the [OpenVINO Build Instructions](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build_linux.md).
 
 ### Setup
 
diff --git a/pyproject.toml b/pyproject.toml
index 27d503f9597..c5a5d9de874 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,6 +80,11 @@ dependencies=[
   "omegaconf>=2.3.0",
 ]
 
+[project.optional-dependencies]
+openvino = [
+  "openvino>=2025.1.0,<2026.0.0; platform_system == 'Linux'",
+]
+
 [project.urls]
 # The keys are arbitrary but will be visible on PyPI.
 Homepage = "https://pytorch.org/executorch/"
diff --git a/setup.py b/setup.py
index f05951012e3..ea959666820 100644
--- a/setup.py
+++ b/setup.py
@@ -717,6 +717,15 @@ def run(self):  # noqa C901
                 f"-DQNN_SDK_ROOT={qnn_sdk_root}",
             ]
 
+        # Enable OpenVINO backend on Linux. The backend uses dlopen at
+        # runtime so it has no build-time SDK dependency.
+        if sys.platform == "linux" and install_utils.is_cmake_option_on(
+            cmake_configuration_args,
+            "EXECUTORCH_BUILD_OPENVINO",
+            default=True,
+        ):
+            cmake_configuration_args += ["-DEXECUTORCH_BUILD_OPENVINO=ON"]
+
         with Buck2EnvironmentFixer():
             # Generate the cmake cache from scratch to ensure that the cache state
             # is predictable.
@@ -794,6 +803,9 @@ def run(self):  # noqa C901
             cmake_build_args += ["--target", "qnn_executorch_backend"]
             cmake_build_args += ["--target", "PyQnnManagerAdaptor"]
 
+        if cmake_cache.is_enabled("EXECUTORCH_BUILD_OPENVINO"):
+            cmake_build_args += ["--target", "openvino_backend"]
+
         # Set PYTHONPATH to the location of the pip package.
         os.environ["PYTHONPATH"] = (
             site.getsitepackages()[0] + ";" + os.environ.get("PYTHONPATH", "")
diff --git a/tools/cmake/preset/pybind.cmake b/tools/cmake/preset/pybind.cmake
index 699a7c50358..5ff17d6df4a 100644
--- a/tools/cmake/preset/pybind.cmake
+++ b/tools/cmake/preset/pybind.cmake
@@ -39,6 +39,7 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
   if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64|i.86)$")
     set_overridable_option(EXECUTORCH_BUILD_QNN OFF)
   endif()
+  set_overridable_option(EXECUTORCH_BUILD_OPENVINO OFF)
 elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL
                                                "WIN32"
 )