-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.sh
More file actions
66 lines (54 loc) · 2.04 KB
/
eval.sh
File metadata and controls
66 lines (54 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
set -euo pipefail
START_TIME_SEC=$(date +%s)
format_elapsed() {
local total_sec="$1"
printf '%02d:%02d:%02d' $((total_sec / 3600)) $(((total_sec % 3600) / 60)) $((total_sec % 60))
}
trap 'END_TIME_SEC=$(date +%s); ELAPSED_SEC=$((END_TIME_SEC - START_TIME_SEC)); echo ""; echo "===== Runtime ====="; echo "Total: $(format_elapsed "$ELAPSED_SEC") (${ELAPSED_SEC}s)"' EXIT
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
INPUT_JSONL=(
"data/example/output.jsonl"
)
TMP_DIR="${SCRIPT_DIR}/output/tmp"
FINAL_DIR="${SCRIPT_DIR}/output/final"
mkdir -p "$TMP_DIR" "$FINAL_DIR"
if declare -p INPUT_JSONL 2>/dev/null | grep -q "declare -a"; then
INPUT_JSONL_LIST=("${INPUT_JSONL[@]}")
else
INPUT_JSONL_LIST=("$INPUT_JSONL")
fi
for INPUT_JSONL_ITEM in "${INPUT_JSONL_LIST[@]}"; do
INPUT_FILE_NAME="$(basename "$INPUT_JSONL_ITEM")"
INPUT_STEM="${INPUT_FILE_NAME%.*}"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
BASE_NAME="${INPUT_STEM}_${TIMESTAMP}"
OUTPUT_ALIGNMENT="${TMP_DIR}/${BASE_NAME}_with_alignment.jsonl"
OUTPUT_SPLIT="${TMP_DIR}/${BASE_NAME}_with_alignment_split.jsonl"
OUTPUT_SIM="${TMP_DIR}/${BASE_NAME}_with_alignment_split_with_sim.jsonl"
OUTPUT_TXT="${FINAL_DIR}/${BASE_NAME}.txt"
echo "Input: $INPUT_JSONL_ITEM"
echo "Alignment output: $OUTPUT_ALIGNMENT"
echo "Split output: $OUTPUT_SPLIT"
echo "Similarity output: $OUTPUT_SIM"
echo ""
echo "===== Step 1: Alignment ====="
python "${SCRIPT_DIR}/tools/align.py" \
--input_jsonl "$INPUT_JSONL_ITEM" \
--output_jsonl "$OUTPUT_ALIGNMENT"
echo ""
echo "===== Step 2: Split ====="
python "${SCRIPT_DIR}/tools/split.py" \
--input_jsonl "$OUTPUT_ALIGNMENT" \
--output_jsonl "$OUTPUT_SPLIT"
echo ""
echo "===== Step 3: Similarity ====="
python "${SCRIPT_DIR}/tools/run_similarity.py" \
--input_jsonl "$OUTPUT_SPLIT" \
--output_jsonl "$OUTPUT_SIM" \
--metrics_txt "$OUTPUT_TXT"
echo ""
echo "===== Done ====="
echo "Final output: $OUTPUT_SIM"
echo ""
done