-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsubmit-prompt.sh
More file actions
executable file
·56 lines (46 loc) · 1.45 KB
/
submit-prompt.sh
File metadata and controls
executable file
·56 lines (46 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash
# ~/computecluster/submit_prompt.sh
if [ -z "$1" ]; then
echo "Usage: ./submit_prompt.sh \"Your prompt here\" [max_tokens]"
exit 1
fi
PROMPT="$1"
MAX_TOKENS="${2:-100}"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
JOB_NAME="vllm_prompt_${TIMESTAMP}"
JOB_DIR="$HOME/computecluster/jobs/prompts"
JOB_SCRIPT="${JOB_DIR}/${JOB_NAME}.sh"
OUTPUT_FILE="${JOB_DIR}/${JOB_NAME}_output.json"
mkdir -p "$JOB_DIR"
# Escape quotes only
ESCAPED_PROMPT=$(printf '%s' "$PROMPT" | sed 's/"/\\"/g')
# Generate SLURM script
cat > "$JOB_SCRIPT" << EOF
#!/bin/bash
#SBATCH --output=${JOB_DIR}/${JOB_NAME}.out
#SBATCH --error=${JOB_DIR}/${JOB_NAME}.err
#SBATCH --job-name=${JOB_NAME}
#SBATCH --time=00:05:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --partition=compute
#SBATCH --account=NONE
# Wait for server
until curl -s http://compute1:8000/v1/models >/dev/null 2>&1; do
echo "Waiting for vLLM server..."
sleep 2
done
# Send request (FIXED JSON!)
curl -s http://compute1:8000/v1/completions \
-H "Content-Type: application/json" \
-d "{\"model\": \"meta-llama/Llama-2-7b-chat-hf\", \"prompt\": \"${ESCAPED_PROMPT}\", \"max_tokens\": ${MAX_TOKENS}}" \
| jq . > ${OUTPUT_FILE}
echo "Saved to ${OUTPUT_FILE}"
EOF
chmod +x "$JOB_SCRIPT"
JOB_ID=$(sbatch --parsable "$JOB_SCRIPT")
echo "Job submitted: $JOB_ID"
echo "Job script: $JOB_SCRIPT"
echo "Output will be saved to: $OUTPUT_FILE"
echo "Monitor with: tail -f ${JOB_DIR}/${JOB_NAME}.out"