Fresh start - excluded large ROM JSON files

This commit is contained in:
OpenClaw Agent
2026-04-11 09:45:12 -05:00
commit 5deb387aa6
395 changed files with 47744 additions and 0 deletions

111
tools/rom-full-scan.py Normal file
View File

@@ -0,0 +1,111 @@
import os
import hashlib
import json
from datetime import datetime
from pathlib import Path
ROM_ROOT = 'R:\\'
OUTPUT_DIR = Path('C:/Users/admin/.openclaw/workspace/rom-inventory')
HASH_LIMIT = 100 * 1024 * 1024
def get_file_hash(filepath, limit_bytes=None):
hasher = hashlib.md5()
try:
with open(filepath, 'rb') as f:
if limit_bytes:
chunk = f.read(limit_bytes)
hasher.update(chunk)
else:
for chunk in iter(lambda: f.read(8192), b''):
hasher.update(chunk)
return hasher.hexdigest()
except Exception as e:
return None
def scan_roms():
print(f'Starting full ROM scan at {datetime.now()}')
print(f'Root: {ROM_ROOT}')
all_files = []
hash_map = {}
stats = {
'total_files': 0,
'total_size': 0,
'hashed_files': 0,
'skipped_large': 0,
'errors': 0,
'by_system': {},
'by_manufacturer': {}
}
organized_path = Path(ROM_ROOT) / 'Rom Sets (Organized)'
if organized_path.exists():
for manufacturer_dir in organized_path.iterdir():
if manufacturer_dir.is_dir():
manufacturer = manufacturer_dir.name
stats['by_manufacturer'][manufacturer] = {'files': 0, 'size': 0}
for system_dir in manufacturer_dir.iterdir():
if system_dir.is_dir():
system = system_dir.name
if system not in stats['by_system']:
stats['by_system'][system] = {'files': 0, 'size': 0}
for root, dirs, files in os.walk(system_dir):
for f in files:
filepath = Path(root) / f
try:
size = filepath.stat().st_size
all_files.append({
'path': str(filepath),
'name': f,
'size': size,
'system': system,
'manufacturer': manufacturer
})
stats['total_files'] += 1
stats['total_size'] += size
stats['by_manufacturer'][manufacturer]['files'] += 1
stats['by_manufacturer'][manufacturer]['size'] += size
stats['by_system'][system]['files'] += 1
stats['by_system'][system]['size'] += size
if size <= HASH_LIMIT:
file_hash = get_file_hash(filepath)
if file_hash:
all_files[-1]['hash'] = file_hash
stats['hashed_files'] += 1
if file_hash not in hash_map:
hash_map[file_hash] = []
hash_map[file_hash].append(str(filepath))
else:
stats['skipped_large'] += 1
except Exception as e:
stats['errors'] += 1
duplicates = {h: files for h, files in hash_map.items() if len(files) > 1}
print(f'\nScan complete at {datetime.now()}')
print(f'Total files: {stats["total_files"]:,}')
print(f'Total size: {stats["total_size"] / (1024**3):.2f} GB')
print(f'Hashed: {stats["hashed_files"]:,}')
print(f'Skipped (large): {stats["skipped_large"]:,}')
print(f'Duplicates found: {len(duplicates)}')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output = {
'scan_date': datetime.now().isoformat(),
'stats': stats,
'duplicates': duplicates,
'files': all_files[:10000]
}
with open(OUTPUT_DIR / 'rom-full-scan.json', 'w') as f:
json.dump(output, f, indent=2)
print(f'\nResults saved to {OUTPUT_DIR / "rom-full-scan.json"}')
return stats
if __name__ == '__main__':
scan_roms()