-
Notifications
You must be signed in to change notification settings - Fork 34
feat(aws): add --tool-call-parser and --chat-template flags for RHEL AI #848
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,6 +46,9 @@ type rhelAIRequest struct { | |
| hfToken *string | ||
| apiKey *string | ||
| autoStart bool | ||
| toolCallParser *string | ||
| chatTemplate *string | ||
| maxModelLen int | ||
| exposePorts []int | ||
| } | ||
|
|
||
|
|
@@ -85,6 +88,9 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) { | |
| hfToken: &args.HFToken, | ||
| apiKey: &args.APIKey, | ||
| autoStart: args.AutoStart, | ||
| maxModelLen: args.MaxModelLen, | ||
| toolCallParser: &args.ToolCallParser, | ||
| chatTemplate: &args.ChatTemplate, | ||
| exposePorts: args.ExposePorts} | ||
| if args.Spot != nil { | ||
| r.spot = args.Spot.Spot | ||
|
|
@@ -373,6 +379,26 @@ func (r *rhelAIRequest) rhaiisSetupScript() string { | |
| ` && sudo sed -i 's|--model .*|--model %s \\|' %s/install.conf`, | ||
| *r.model, confDir) | ||
| } | ||
| script += fmt.Sprintf( | ||
| ` && GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l) && [ "$GPU_COUNT" -gt 0 ] && sudo sed -i "s|--tensor-parallel-size 1|--tensor-parallel-size $GPU_COUNT|" %s/install.conf`, | ||
| confDir) | ||
| maxModelLen := 4096 | ||
| if r.maxModelLen > 0 { | ||
| maxModelLen = r.maxModelLen | ||
| } | ||
| if len(*r.toolCallParser) > 0 { | ||
| toolArgs := fmt.Sprintf(`--enable-auto-tool-choice \\\n --tool-call-parser %s`, *r.toolCallParser) | ||
| if len(*r.chatTemplate) > 0 { | ||
| toolArgs += fmt.Sprintf(` \\\n --chat-template /opt/app-root/template/%s`, *r.chatTemplate) | ||
| } | ||
| script += fmt.Sprintf( | ||
| ` && sudo sed -i 's|--max-model-len.*|--max-model-len %d \\\n %s|' %s/install.conf`, | ||
| maxModelLen, toolArgs, confDir) | ||
| } else if r.maxModelLen > 0 { | ||
| script += fmt.Sprintf( | ||
| ` && sudo sed -i 's|--max-model-len.*|--max-model-len %d|' %s/install.conf`, | ||
| maxModelLen, confDir) | ||
| } | ||
|
Comment on lines
+389
to
+401
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎯 Functional Correctness | 🟠 Major | ⚡ Quick win
Line 380 gates the entire config update on 🤖 Prompt for AI Agents |
||
| if len(*r.apiKey) > 0 { | ||
| script += fmt.Sprintf( | ||
| " && sudo sed -i '/\\[Install\\]/i Environment=VLLM_API_KEY=%s' %s/install.conf", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -155,6 +155,15 @@ spec: | |
| - name: expose-ports | ||
| description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080). | ||
| default: "" | ||
| - name: tool-call-parser | ||
| description: Enable tool calling with the specified parser (e.g. llama3_json, hermes, mistral). Automatically adds --enable-auto-tool-choice. | ||
| default: "" | ||
| - name: chat-template | ||
| description: Chat template jinja filename (e.g. tool_chat_template_llama3.2_json.jinja). | ||
| default: "" | ||
| - name: max-model-len | ||
| description: Maximum model context length in tokens (default 4096). Increase for tool calling or larger models. | ||
| default: "0" | ||
|
|
||
| # Network params | ||
| - name: service-endpoints | ||
|
|
@@ -317,6 +326,15 @@ spec: | |
| if [[ "$(params.expose-ports)" != "" ]]; then | ||
| cmd+="--expose-ports '$(params.expose-ports)' " | ||
| fi | ||
| if [[ "$(params.tool-call-parser)" != "" ]]; then | ||
| cmd+="--tool-call-parser '$(params.tool-call-parser)' " | ||
| fi | ||
| if [[ "$(params.chat-template)" != "" ]]; then | ||
| cmd+="--chat-template '$(params.chat-template)' " | ||
| fi | ||
|
Comment on lines
+329
to
+334
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔒 Security & Privacy | 🔴 Critical | ⚡ Quick win Do not append raw Tekton params into the These values flow into 🤖 Prompt for AI Agents |
||
| if [[ "$(params.max-model-len)" != "0" ]]; then | ||
| cmd+="--max-model-len '$(params.max-model-len)' " | ||
| fi | ||
| cmd+="--tags '$(params.tags)' " | ||
| fi | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -85,6 +85,12 @@ spec: | |||||||||||||||||||||||||||||||||||||
| - name: disk-size | ||||||||||||||||||||||||||||||||||||||
| description: Disk size in GB for the cloud instance | ||||||||||||||||||||||||||||||||||||||
| default: "200" | ||||||||||||||||||||||||||||||||||||||
| - name: gpus | ||||||||||||||||||||||||||||||||||||||
| description: Number of GPUs for the cloud instance (valid marketplace values are 1, 2, 4, 8) | ||||||||||||||||||||||||||||||||||||||
| default: "8" | ||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+88
to
+90
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🚀 Performance & Scalability | 🟠 Major | ⚡ Quick win Preserve the Azure RHEL AI GPU default unless callers opt in. With Proposed fix - name: gpus
description: Number of GPUs for the cloud instance (valid marketplace values are 1, 2, 4, 8)
- default: "8"
+ default: ""As per path instructions, Also applies to: 238-240 🤖 Prompt for AI AgentsSource: Path instructions |
||||||||||||||||||||||||||||||||||||||
| - name: gpu-manufacturer | ||||||||||||||||||||||||||||||||||||||
| description: GPU manufacturer name for instance filtering (e.g. NVIDIA, AMD) | ||||||||||||||||||||||||||||||||||||||
| default: "" | ||||||||||||||||||||||||||||||||||||||
| - name: compute-sizes | ||||||||||||||||||||||||||||||||||||||
| description: Comma seperated list of sizes for the machines to be requested. If set this takes precedence over compute by args | ||||||||||||||||||||||||||||||||||||||
| default: "Standard_ND96is_MI300X_v5,Standard_ND96isr_MI300X_v5" | ||||||||||||||||||||||||||||||||||||||
|
|
@@ -229,6 +235,12 @@ spec: | |||||||||||||||||||||||||||||||||||||
| if [[ "$(params.compute-sizes)" != "" ]]; then | ||||||||||||||||||||||||||||||||||||||
| cmd+="--compute-sizes '$(params.compute-sizes)' " | ||||||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||||||
| if [[ "$(params.gpus)" != "" ]]; then | ||||||||||||||||||||||||||||||||||||||
| cmd+="--gpus '$(params.gpus)' " | ||||||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||||||
| if [[ "$(params.gpu-manufacturer)" != "" ]]; then | ||||||||||||||||||||||||||||||||||||||
| cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' " | ||||||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+238
to
+243
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔒 Security & Privacy | 🟠 Major | ⚡ Quick win Validate new params before adding them to the eval’d command. These Tekton params are interpolated into Proposed hardening if [[ "$(params.gpus)" != "" ]]; then
- cmd+="--gpus '$(params.gpus)' "
+ case "$(params.gpus)" in
+ *[!0-9]*) echo "Parameter gpus must be numeric"; exit 1 ;;
+ esac
+ cmd+="--gpus $(params.gpus) "
fi
if [[ "$(params.gpu-manufacturer)" != "" ]]; then
+ case "$(params.gpu-manufacturer)" in
+ *[!A-Za-z0-9._-]*) echo "Parameter gpu-manufacturer contains unsupported characters"; exit 1 ;;
+ esac
cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' "
fiAs per path instructions, 📝 Committable suggestion
Suggested change
🤖 Prompt for AI AgentsSource: Path instructions |
||||||||||||||||||||||||||||||||||||||
| if [[ "$(params.marketplace)" == "true" ]]; then | ||||||||||||||||||||||||||||||||||||||
| cmd+="--marketplace " | ||||||||||||||||||||||||||||||||||||||
| cmd+="--accelerator '$(params.accelerator)' " | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -155,6 +155,15 @@ spec: | |
| - name: expose-ports | ||
| description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080). | ||
| default: "" | ||
| - name: tool-call-parser | ||
| description: Enable tool calling with the specified parser (e.g. llama3_json, hermes, mistral). Automatically adds --enable-auto-tool-choice. | ||
| default: "" | ||
| - name: chat-template | ||
| description: Chat template jinja filename (e.g. tool_chat_template_llama3.2_json.jinja). | ||
| default: "" | ||
| - name: max-model-len | ||
| description: Maximum model context length in tokens (default 4096). Increase for tool calling or larger models. | ||
| default: "0" | ||
|
|
||
| # Network params | ||
| - name: service-endpoints | ||
|
|
@@ -317,6 +326,15 @@ spec: | |
| if [[ "$(params.expose-ports)" != "" ]]; then | ||
| cmd+="--expose-ports '$(params.expose-ports)' " | ||
| fi | ||
| if [[ "$(params.tool-call-parser)" != "" ]]; then | ||
| cmd+="--tool-call-parser '$(params.tool-call-parser)' " | ||
| fi | ||
| if [[ "$(params.chat-template)" != "" ]]; then | ||
| cmd+="--chat-template '$(params.chat-template)' " | ||
| fi | ||
|
Comment on lines
+329
to
+334
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔒 Security & Privacy | 🔴 Critical | ⚡ Quick win Do not append raw Tekton params into the These values flow into 🤖 Prompt for AI Agents |
||
| if [[ "$(params.max-model-len)" != "0" ]]; then | ||
| cmd+="--max-model-len '$(params.max-model-len)' " | ||
| fi | ||
| cmd+="--tags '$(params.tags)' " | ||
| fi | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.