112 lines
4.4 KiB
Python
112 lines
4.4 KiB
Python
import os
|
|
import hashlib
|
|
import json
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
ROM_ROOT = 'R:\\'
|
|
OUTPUT_DIR = Path('C:/Users/admin/.openclaw/workspace/rom-inventory')
|
|
HASH_LIMIT = 100 * 1024 * 1024
|
|
|
|
def get_file_hash(filepath, limit_bytes=None):
|
|
hasher = hashlib.md5()
|
|
try:
|
|
with open(filepath, 'rb') as f:
|
|
if limit_bytes:
|
|
chunk = f.read(limit_bytes)
|
|
hasher.update(chunk)
|
|
else:
|
|
for chunk in iter(lambda: f.read(8192), b''):
|
|
hasher.update(chunk)
|
|
return hasher.hexdigest()
|
|
except Exception as e:
|
|
return None
|
|
|
|
def scan_roms():
|
|
print(f'Starting full ROM scan at {datetime.now()}')
|
|
print(f'Root: {ROM_ROOT}')
|
|
|
|
all_files = []
|
|
hash_map = {}
|
|
stats = {
|
|
'total_files': 0,
|
|
'total_size': 0,
|
|
'hashed_files': 0,
|
|
'skipped_large': 0,
|
|
'errors': 0,
|
|
'by_system': {},
|
|
'by_manufacturer': {}
|
|
}
|
|
|
|
organized_path = Path(ROM_ROOT) / 'Rom Sets (Organized)'
|
|
if organized_path.exists():
|
|
for manufacturer_dir in organized_path.iterdir():
|
|
if manufacturer_dir.is_dir():
|
|
manufacturer = manufacturer_dir.name
|
|
stats['by_manufacturer'][manufacturer] = {'files': 0, 'size': 0}
|
|
|
|
for system_dir in manufacturer_dir.iterdir():
|
|
if system_dir.is_dir():
|
|
system = system_dir.name
|
|
if system not in stats['by_system']:
|
|
stats['by_system'][system] = {'files': 0, 'size': 0}
|
|
|
|
for root, dirs, files in os.walk(system_dir):
|
|
for f in files:
|
|
filepath = Path(root) / f
|
|
try:
|
|
size = filepath.stat().st_size
|
|
all_files.append({
|
|
'path': str(filepath),
|
|
'name': f,
|
|
'size': size,
|
|
'system': system,
|
|
'manufacturer': manufacturer
|
|
})
|
|
stats['total_files'] += 1
|
|
stats['total_size'] += size
|
|
stats['by_manufacturer'][manufacturer]['files'] += 1
|
|
stats['by_manufacturer'][manufacturer]['size'] += size
|
|
stats['by_system'][system]['files'] += 1
|
|
stats['by_system'][system]['size'] += size
|
|
|
|
if size <= HASH_LIMIT:
|
|
file_hash = get_file_hash(filepath)
|
|
if file_hash:
|
|
all_files[-1]['hash'] = file_hash
|
|
stats['hashed_files'] += 1
|
|
if file_hash not in hash_map:
|
|
hash_map[file_hash] = []
|
|
hash_map[file_hash].append(str(filepath))
|
|
else:
|
|
stats['skipped_large'] += 1
|
|
except Exception as e:
|
|
stats['errors'] += 1
|
|
|
|
duplicates = {h: files for h, files in hash_map.items() if len(files) > 1}
|
|
|
|
print(f'\nScan complete at {datetime.now()}')
|
|
print(f'Total files: {stats["total_files"]:,}')
|
|
print(f'Total size: {stats["total_size"] / (1024**3):.2f} GB')
|
|
print(f'Hashed: {stats["hashed_files"]:,}')
|
|
print(f'Skipped (large): {stats["skipped_large"]:,}')
|
|
print(f'Duplicates found: {len(duplicates)}')
|
|
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
output = {
|
|
'scan_date': datetime.now().isoformat(),
|
|
'stats': stats,
|
|
'duplicates': duplicates,
|
|
'files': all_files[:10000]
|
|
}
|
|
|
|
with open(OUTPUT_DIR / 'rom-full-scan.json', 'w') as f:
|
|
json.dump(output, f, indent=2)
|
|
|
|
print(f'\nResults saved to {OUTPUT_DIR / "rom-full-scan.json"}')
|
|
return stats
|
|
|
|
if __name__ == '__main__':
|
|
scan_roms()
|