-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidate_sample_schematics.py
More file actions
executable file
·121 lines (98 loc) · 3.98 KB
/
validate_sample_schematics.py
File metadata and controls
executable file
·121 lines (98 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
"""
Script to validate a sample of schematic files in the minecraft-schematics-raw directory.
This checks that each file can be loaded without errors using our schematic_loader function.
"""
import os
import sys
import time
import random
from concurrent.futures import ProcessPoolExecutor, as_completed
import traceback
from schematic_loader import load_schematic_to_numpy
# Number of files to sample
SAMPLE_SIZE = 500
def validate_schematic(file_path):
"""
Attempt to load a schematic file and return success/failure info.
Args:
file_path (str): Path to the schematic file
Returns:
tuple: (file_path, success, error_message)
"""
try:
blocks, dimensions = load_schematic_to_numpy(file_path)
return (file_path, True, dimensions)
except Exception as e:
error_message = traceback.format_exc()
return (file_path, False, error_message)
def main():
# Check if the directory exists
raw_dir = 'minecraft-schematics-raw'
if not os.path.exists(raw_dir):
print(f"Error: Directory '{raw_dir}' not found.")
sys.exit(1)
# Get all .schematic files
schematic_files = []
for root, _, files in os.walk(raw_dir):
for file in files:
if file.endswith('.schematic'):
schematic_files.append(os.path.join(root, file))
total_files = len(schematic_files)
if total_files == 0:
print(f"No .schematic files found in '{raw_dir}'.")
sys.exit(0)
# Sample a subset of files
sample_size = min(SAMPLE_SIZE, total_files)
sampled_files = random.sample(schematic_files, sample_size)
print(f"Found {total_files} schematic files. Validating a sample of {sample_size} files...")
# Install tqdm if not already installed
try:
from tqdm import tqdm
except ImportError:
print("Installing tqdm for progress display...")
os.system('pip install tqdm')
from tqdm import tqdm
# Process files with a progress bar
start_time = time.time()
successful = 0
failed = 0
failed_files = []
# Use multiprocessing to speed up validation
max_workers = min(os.cpu_count() or 4, 8) # Limit to 8 workers max
with ProcessPoolExecutor(max_workers=max_workers) as executor:
# Submit all tasks
future_to_file = {executor.submit(validate_schematic, file): file for file in sampled_files}
# Process results as they complete
for future in tqdm(as_completed(future_to_file), total=sample_size, desc="Validating"):
file_path, success, result = future.result()
if success:
successful += 1
else:
failed += 1
failed_files.append((file_path, result))
# Calculate statistics
success_rate = (successful / sample_size) * 100
elapsed_time = time.time() - start_time
# Print results
print("\n--- Validation Results ---")
print(f"Sample size: {sample_size} out of {total_files} total files")
print(f"Successfully loaded: {successful} ({success_rate:.2f}%)")
print(f"Failed to load: {failed} ({100 - success_rate:.2f}%)")
print(f"Time taken: {elapsed_time:.2f} seconds")
# Write detailed error report if there were failures
if failed:
error_report = "error_report.txt"
with open(error_report, 'w') as f:
f.write(f"Schematic Validation Error Report\n")
f.write(f"Sample size: {sample_size} out of {total_files} total files\n")
f.write(f"Failed files: {failed}\n\n")
for file_path, error in failed_files:
f.write(f"\n--- {file_path} ---\n")
f.write(f"{error}\n")
f.write("-" * 80 + "\n")
print(f"\nDetailed error report written to {error_report}")
# Return success code based on validation results
return 0 if failed == 0 else 1
if __name__ == "__main__":
sys.exit(main())