@@ -91,21 +91,16 @@ jobs:
9191 sudo apt-get install -y openssl libssl-dev pkg-config python3-pip
9292 pip install csvtomd numpy scipy
9393
94- # Download the public Wikipedia-100K dataset via big-ann-benchmarks
94+ # Download pre-packaged Wikipedia-100K dataset from GitHub Release
9595 # Dataset: 100K Cohere Wikipedia embeddings (768-dim, float32, cosine distance)
96- # Source: https://github.com/harsha-simhadri/big-ann-benchmarks
97- - name : Clone big-ann-benchmarks
98- run : git clone --depth 1 https://github.com/harsha-simhadri/big-ann-benchmarks.git
99-
10096 - name : Download wikipedia-100K dataset
101- working-directory : big-ann-benchmarks
102- run : python create_dataset.py --dataset wikipedia-100K
103-
104- - name : Copy dataset to benchmark directories
97+ env :
98+ GH_TOKEN : ${{ github.token }}
10599 run : |
106100 mkdir -p diskann_rust/target/tmp baseline/target/tmp
107- cp -r big-ann-benchmarks/data/wikipedia_cohere diskann_rust/target/tmp/
108- cp -r big-ann-benchmarks/data/wikipedia_cohere baseline/target/tmp/
101+ gh release download benchmark-data-v1 --repo ${{ github.repository }} --pattern 'wikipedia-100K.tar.gz' --dir .
102+ tar xzf wikipedia-100K.tar.gz -C diskann_rust/target/tmp/
103+ cp -r diskann_rust/target/tmp/wikipedia_cohere baseline/target/tmp/
109104
110105 - name : Run baseline benchmark
111106 working-directory : baseline
@@ -214,21 +209,16 @@ jobs:
214209 sudo apt-get install -y openssl libssl-dev pkg-config python3-pip
215210 pip install csvtomd numpy scipy
216211
217- # Download the public OpenAI ArXiv 100K dataset via big-ann-benchmarks
212+ # Download pre-packaged OpenAI ArXiv 100K dataset from GitHub Release
218213 # Dataset: 100K OpenAI embeddings of ArXiv papers (1536-dim, float32, euclidean distance)
219- # Source: https://github.com/harsha-simhadri/big-ann-benchmarks
220- - name : Clone big-ann-benchmarks
221- run : git clone --depth 1 https://github.com/harsha-simhadri/big-ann-benchmarks.git
222-
223214 - name : Download openai-100K dataset
224- working-directory : big-ann-benchmarks
225- run : python create_dataset.py --dataset openai-100K
226-
227- - name : Copy dataset to benchmark directories
215+ env :
216+ GH_TOKEN : ${{ github.token }}
228217 run : |
229218 mkdir -p diskann_rust/target/tmp baseline/target/tmp
230- cp -r big-ann-benchmarks/data/OpenAIArXiv diskann_rust/target/tmp/
231- cp -r big-ann-benchmarks/data/OpenAIArXiv baseline/target/tmp/
219+ gh release download benchmark-data-v1 --repo ${{ github.repository }} --pattern 'openai-100K.tar.gz' --dir .
220+ tar xzf openai-100K.tar.gz -C diskann_rust/target/tmp/
221+ cp -r diskann_rust/target/tmp/OpenAIArXiv baseline/target/tmp/
232222
233223 - name : Run baseline benchmark
234224 working-directory : baseline
0 commit comments