update the format

WenjinFu · WenjinFu · commit 5e26e247f0f5 · 2026-04-03T13:01:02.000-07:00
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -46,4 +46,4 @@ jobs:
           push: true
           platforms: linux/arm64
           tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
 functionGemma-finetuned-g1/
 venv/
 venv/
+__pycache__/
+*.pyc
diff --git a/benchmark-g1-server.py b/benchmark-g1-server.py
@@ -3,8 +3,9 @@
 Usage: python3 benchmark_client.py [--url http://localhost:8200]
 """
 
-import time
 import argparse
+import time
+
 import requests
 
 TESTS = [
@@ -25,6 +26,7 @@
     "Thank you so much",
 ]
 
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--url", default="http://localhost:8200")
@@ -52,9 +54,11 @@ def main():
         inference_ms = data["latency_ms"]
         times.append(inference_ms)
 
-        print(f"  {inference_ms:5.0f}ms inference | {total_ms:5.0f}ms total | {t:<30s} -> {data['action']:<16s} {data['emotion']}")
+        print(
+            f"  {inference_ms:5.0f}ms inference | {total_ms:5.0f}ms total | {t:<30s} -> {data['action']:<16s} {data['emotion']}"
+        )
 
-    print(f"\n--- Inference (model only) ---")
+    print("\n--- Inference (model only) ---")
     print(f"Min:     {min(times):.0f}ms")
     print(f"Max:     {max(times):.0f}ms")
     print(f"Average: {sum(times)/len(times):.0f}ms")
@@ -65,7 +69,10 @@ def main():
     r = requests.post(f"{args.url}/predict_batch", json={"texts": TESTS})
     total_ms = (time.time() - start) * 1000
     data = r.json()
-    print(f"Total:   {data['total_latency_ms']:.0f}ms inference | {total_ms:.0f}ms with network")
+    print(
+        f"Total:   {data['total_latency_ms']:.0f}ms inference | {total_ms:.0f}ms with network"
+    )
+
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/chat_client.py b/chat_client.py
@@ -3,10 +3,11 @@
 Usage: python3 chat_client.py [--url http://localhost:8200]
 """
 
-import json
 import argparse
+
 import requests
 
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--url", default="http://localhost:8200")
@@ -23,7 +24,10 @@ def main():
 
         r = requests.post(f"{args.url}/predict", json={"text": text})
         data = r.json()
-        print(f"Robot ({data['latency_ms']:.0f}ms): action={data['action']}  emotion={data['emotion']}\n")
+        print(
+            f"Robot ({data['latency_ms']:.0f}ms): action={data['action']}  emotion={data['emotion']}\n"
+        )
+
 
 if __name__ == "__main__":
     main()
diff --git a/src/functiongemma/server.py b/src/functiongemma/server.py
@@ -212,9 +212,7 @@ def generate_constrained(input_ids: torch.Tensor) -> tuple[str, str]:
     # Forward pass 2: feed action tokens + suffix, pick emotion
     combined = action_token_ids[chosen_action] + action_suffix
     combined_tensor = torch.tensor([combined], dtype=torch.long, device=device)
-    outputs = model(
-        input_ids=combined_tensor, past_key_values=past, use_cache=True
-    )
+    outputs = model(input_ids=combined_tensor, past_key_values=past, use_cache=True)
 
     logits = outputs.logits[:, -1, :]
     mask = torch.full_like(logits, float("-inf"))
@@ -319,9 +317,7 @@ def predict(req: PredictRequest):
         emotion,
         latency,
     )
-    return PredictResponse(
-        action=action, emotion=emotion, latency_ms=round(latency, 1)
-    )
+    return PredictResponse(action=action, emotion=emotion, latency_ms=round(latency, 1))
 
 
 @app.post("/predict_batch", response_model=BatchPredictResponse)
@@ -363,7 +359,9 @@ def predict_batch(req: BatchPredictRequest):
         )
 
     total_latency = (time.perf_counter() - total_start) * 1000
-    logger.info("predict_batch | count=%d | total=%.0fms", len(req.texts), total_latency)
+    logger.info(
+        "predict_batch | count=%d | total=%.0fms", len(req.texts), total_latency
+    )
     return BatchPredictResponse(
         results=results, count=len(results), total_latency_ms=round(total_latency, 1)
     )
diff --git a/tests/test_functiongemma.py b/tests/test_functiongemma.py

-Original file line number
+Diff line change
@@ @@ -1,3 +1,5 @@ @@
 functionGemma-finetuned-g1/
 venv/
 venv/
 +__pycache__/
 +*.pyc