import os import hashlib import json from datetime import datetime from pathlib import Path ROM_ROOT = 'R:\\' OUTPUT_DIR = Path('C:/Users/admin/.openclaw/workspace/rom-inventory') HASH_LIMIT = 100 * 1024 * 1024 def get_file_hash(filepath, limit_bytes=None): hasher = hashlib.md5() try: with open(filepath, 'rb') as f: if limit_bytes: chunk = f.read(limit_bytes) hasher.update(chunk) else: for chunk in iter(lambda: f.read(8192), b''): hasher.update(chunk) return hasher.hexdigest() except Exception as e: return None def scan_roms(): print(f'Starting full ROM scan at {datetime.now()}') print(f'Root: {ROM_ROOT}') all_files = [] hash_map = {} stats = { 'total_files': 0, 'total_size': 0, 'hashed_files': 0, 'skipped_large': 0, 'errors': 0, 'by_system': {}, 'by_manufacturer': {} } organized_path = Path(ROM_ROOT) / 'Rom Sets (Organized)' if organized_path.exists(): for manufacturer_dir in organized_path.iterdir(): if manufacturer_dir.is_dir(): manufacturer = manufacturer_dir.name stats['by_manufacturer'][manufacturer] = {'files': 0, 'size': 0} for system_dir in manufacturer_dir.iterdir(): if system_dir.is_dir(): system = system_dir.name if system not in stats['by_system']: stats['by_system'][system] = {'files': 0, 'size': 0} for root, dirs, files in os.walk(system_dir): for f in files: filepath = Path(root) / f try: size = filepath.stat().st_size all_files.append({ 'path': str(filepath), 'name': f, 'size': size, 'system': system, 'manufacturer': manufacturer }) stats['total_files'] += 1 stats['total_size'] += size stats['by_manufacturer'][manufacturer]['files'] += 1 stats['by_manufacturer'][manufacturer]['size'] += size stats['by_system'][system]['files'] += 1 stats['by_system'][system]['size'] += size if size <= HASH_LIMIT: file_hash = get_file_hash(filepath) if file_hash: all_files[-1]['hash'] = file_hash stats['hashed_files'] += 1 if file_hash not in hash_map: hash_map[file_hash] = [] hash_map[file_hash].append(str(filepath)) else: stats['skipped_large'] += 1 except Exception as e: stats['errors'] += 1 duplicates = {h: files for h, files in hash_map.items() if len(files) > 1} print(f'\nScan complete at {datetime.now()}') print(f'Total files: {stats["total_files"]:,}') print(f'Total size: {stats["total_size"] / (1024**3):.2f} GB') print(f'Hashed: {stats["hashed_files"]:,}') print(f'Skipped (large): {stats["skipped_large"]:,}') print(f'Duplicates found: {len(duplicates)}') OUTPUT_DIR.mkdir(parents=True, exist_ok=True) output = { 'scan_date': datetime.now().isoformat(), 'stats': stats, 'duplicates': duplicates, 'files': all_files[:10000] } with open(OUTPUT_DIR / 'rom-full-scan.json', 'w') as f: json.dump(output, f, indent=2) print(f'\nResults saved to {OUTPUT_DIR / "rom-full-scan.json"}') return stats if __name__ == '__main__': scan_roms()