Fresh start - excluded large ROM JSON files
This commit is contained in:
156
tools/rom-inventory.py
Normal file
156
tools/rom-inventory.py
Normal file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ROM Library Inventory Scanner"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
ROOT = r"R:\"
|
||||
OUTPUT_DIR = r"C:\Users\admin\.openclaw\workspace\rom-inventory"
|
||||
|
||||
def ensure_output_dir():
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
def get_file_hash(filepath, block_size=65536):
|
||||
try:
|
||||
hasher = hashlib.md5()
|
||||
with open(filepath, "rb") as f:
|
||||
for block in iter(lambda: f.read(block_size), b""):
|
||||
hasher.update(block)
|
||||
return hasher.hexdigest()
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_file_size(filepath):
|
||||
try:
|
||||
return os.path.getsize(filepath)
|
||||
except:
|
||||
return 0
|
||||
|
||||
ROM_EXTENSIONS = {
|
||||
".sfc", ".nes", ".z64", ".v64", ".n64", ".gba", ".gbc", ".gb",
|
||||
".nds", ".3ds", ".iso", ".bin", ".cue", ".cso", ".pbp",
|
||||
".rom", ".md", ".sms", ".gg", ".sg", ".col", ".vec",
|
||||
".a26", ".a52", ".a78", ".lnx", ".j64", ".jag",
|
||||
".pce", ".ngp", ".ngc", ".ws", ".wsc", ".vb",
|
||||
".zip", ".7z", ".rar"
|
||||
}
|
||||
|
||||
def is_rom_file(filename):
|
||||
return Path(filename).suffix.lower() in ROM_EXTENSIONS
|
||||
|
||||
def scan_console(console_path, console_name):
|
||||
roms = []
|
||||
total_size = 0
|
||||
file_count = 0
|
||||
|
||||
console_path = Path(console_path)
|
||||
if not console_path.exists():
|
||||
return {"console": console_name, "error": "Path not found"}
|
||||
|
||||
for root, dirs, files in os.walk(console_path):
|
||||
for file in files:
|
||||
if is_rom_file(file):
|
||||
filepath = Path(root) / file
|
||||
size = get_file_size(filepath)
|
||||
total_size += size
|
||||
file_count += 1
|
||||
|
||||
file_hash = None
|
||||
if size < 100 * 1024 * 1024:
|
||||
file_hash = get_file_hash(filepath)
|
||||
|
||||
roms.append({
|
||||
"name": file,
|
||||
"path": str(filepath),
|
||||
"size": size,
|
||||
"hash": file_hash
|
||||
})
|
||||
|
||||
return {
|
||||
"console": console_name,
|
||||
"file_count": file_count,
|
||||
"total_size_bytes": total_size,
|
||||
"total_size_gb": round(total_size / (1024**3), 2),
|
||||
"roms": roms[:100]
|
||||
}
|
||||
|
||||
def scan_all():
|
||||
ensure_output_dir()
|
||||
|
||||
organized_path = Path(ROOT) / "Rom Sets (Organized)"
|
||||
|
||||
results = {
|
||||
"scan_date": datetime.now().isoformat(),
|
||||
"root": ROOT,
|
||||
"organized": {},
|
||||
"duplicates": [],
|
||||
"summary": {}
|
||||
}
|
||||
|
||||
print("Scanning Rom Sets (Organized)...")
|
||||
for manufacturer in organized_path.iterdir():
|
||||
if manufacturer.is_dir():
|
||||
print(f" {manufacturer.name}...")
|
||||
results["organized"][manufacturer.name] = {}
|
||||
for console in manufacturer.iterdir():
|
||||
if console.is_dir():
|
||||
console_data = scan_console(console, console.name)
|
||||
results["organized"][manufacturer.name][console.name] = console_data
|
||||
print(f" {console.name}: {console_data.get("file_count", 0)} files")
|
||||
|
||||
print("
|
||||
Finding duplicates...")
|
||||
all_hashes = defaultdict(list)
|
||||
for manufacturer, consoles in results["organized"].items():
|
||||
for console, data in consoles.items():
|
||||
if "roms" in data:
|
||||
for rom in data["roms"]:
|
||||
if rom.get("hash"):
|
||||
all_hashes[rom["hash"]].append({
|
||||
"name": rom["name"],
|
||||
"console": console,
|
||||
"path": rom["path"]
|
||||
})
|
||||
|
||||
results["duplicates"] = [
|
||||
{"hash": h, "files": files}
|
||||
for h, files in all_hashes.items()
|
||||
if len(files) > 1
|
||||
]
|
||||
|
||||
total_files = sum(
|
||||
data.get("file_count", 0)
|
||||
for consoles in results["organized"].values()
|
||||
for data in consoles.values()
|
||||
)
|
||||
total_size = sum(
|
||||
data.get("total_size_bytes", 0)
|
||||
for consoles in results["organized"].values()
|
||||
for data in consoles.values()
|
||||
)
|
||||
|
||||
results["summary"] = {
|
||||
"total_roms": total_files,
|
||||
"total_size_gb": round(total_size / (1024**3), 2),
|
||||
"duplicate_count": len(results["duplicates"])
|
||||
}
|
||||
|
||||
output_file = Path(OUTPUT_DIR) / f"rom-inventory-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
|
||||
with open(output_file, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
print(f"
|
||||
=== INVENTORY COMPLETE ===")
|
||||
print(f"Total ROMs: {total_files:,}")
|
||||
print(f"Total Size: {results["summary"]["total_size_gb"]:.2f} GB")
|
||||
print(f"Duplicates Found: {len(results["duplicates"])}")
|
||||
print(f"Output: {output_file}")
|
||||
|
||||
return results
|
||||
|
||||
if __name__ == "__main__":
|
||||
scan_all()
|
||||
Reference in New Issue
Block a user