Update lora def

lucylq · lucylq · commit 72a9449e3042 · 2026-03-17T16:22:29.000-07:00
diff --git a/backends/xnnpack/operators/node_visitor.py b/backends/xnnpack/operators/node_visitor.py
@@ -625,7 +625,7 @@ def get_serialized_buffer_index(
             f"Serializing constant data node {tensor} but tensor value has no bytes",
         )
         sha256_hash = hashlib.sha256(bytes(array))
-        named_key = tensor.name + "_" + sha256_hash.hexdigest()
+        named_key = sha256_hash.hexdigest()
 
         size = const_val.untyped_storage().nbytes()
         xnn_graph.constant_data.append(
diff --git a/examples/models/llama/lora.py b/examples/models/llama/lora.py
@@ -28,20 +28,25 @@ def __init__(
         self.use_bias = use_bias
         self.dropout = dropout
 
-        linear = nn.Linear(in_dim, out_dim, bias=use_bias)
-        weight = linear.weight
-        bias = linear.bias if self.use_bias else None
-        self.register_parameter("weight", nn.Parameter(weight))
-        self.register_parameter(
-            "bias", nn.Parameter(bias) if bias is not None else None
-        )
-
+        self.linear = nn.Linear(in_dim, out_dim, bias=use_bias)
         self.dropout = nn.Dropout(p=dropout) if dropout > 0.0 else nn.Identity()
         self.lora_a = nn.Linear(in_features=in_dim, out_features=rank, bias=False)
         self.lora_b = nn.Linear(in_features=rank, out_features=out_dim, bias=False)
 
+    @property
+    def weight(self):
+        return self.linear.weight
+
+    def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs):
+        # Remap old-style "weight" key to "linear.weight" for backward compat
+        old_key = prefix + "weight"
+        new_key = prefix + "linear.weight"
+        if old_key in state_dict and new_key not in state_dict:
+            state_dict[new_key] = state_dict.pop(old_key)
+        super()._load_from_state_dict(state_dict, prefix, *args, **kwargs)
+
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.nn.functional.linear(x, self.weight, self.bias)
+        out = self.linear(x)
         lora_out = self.lora_a(self.dropout(x))
         lora_out = (self.alpha / self.rank) * self.lora_b(lora_out)
 
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
@@ -144,30 +144,11 @@ def quantize(  # noqa C901
         from torchao.utils import unwrap_tensor_subclass
 
         def filter_fn(m, fqn):
-            # Check if it's a regular nn.Linear
-            is_linear = isinstance(m, nn.Linear)
-
-            # Check if it's a LoRALinear (which has a base weight parameter to quantize)
-            is_lora_linear = False
-            try:
-                from executorch.examples.models.llama.lora import LoRALinear
-
-                is_lora_linear = isinstance(m, LoRALinear)
-            except ImportError:
-                pass
-
-            # Check if the weight shape is compatible with group size
-            has_shape_compatible_with_group_size = False
-            if is_linear or is_lora_linear:
-                if group_size == 0:
-                    has_shape_compatible_with_group_size = True
-                else:
-                    has_shape_compatible_with_group_size = (
-                        m.weight.shape[1] % group_size == 0
-                    )
-            return (
-                is_linear or is_lora_linear
-            ) and has_shape_compatible_with_group_size
+            if not isinstance(m, nn.Linear):
+                return False
+            if group_size == 0:
+                return True
+            return m.weight.shape[1] % group_size == 0
 
         weight_dtype = torch.int4 if qmode == "8da4w" else torch.int8
         quantize_(

Original file line number	Diff line number	Diff line change
`@@ -625,7 +625,7 @@ def get_serialized_buffer_index(`
`625`	`625`	`f"Serializing constant data node {tensor} but tensor value has no bytes",`
`626`	`626`	`)`
`627`	`627`	`sha256_hash = hashlib.sha256(bytes(array))`
`628`		`- named_key = tensor.name + "_" + sha256_hash.hexdigest()`
	`628`	`+ named_key = sha256_hash.hexdigest()`
`629`	`629`
`630`	`630`	`size = const_val.untyped_storage().nbytes()`
`631`	`631`	`xnn_graph.constant_data.append(`