Fresh start - excluded large ROM JSON files

This commit is contained in:
OpenClaw Agent
2026-04-11 09:45:12 -05:00
commit 5deb387aa6
395 changed files with 47744 additions and 0 deletions

102
tools/bulk_memory_loader.py Normal file
View File

@@ -0,0 +1,102 @@
"""
Bulk Memory Loader
================
Loads all historical memory files into vector database.
"""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import glob
from datetime import datetime
from memory_vector import setup_memory_vectors, store_memory, MemoryVectorDB
from tools.memory_embedding_worker import process_memory_file
def get_all_memory_files():
"""Get all memory files to bulk load."""
workspace = os.path.expanduser("~/.openclaw/workspace")
files = []
# 1. Daily notes - all of them
daily_pattern = os.path.join(workspace, "memory", "202*.md")
for f in glob.glob(daily_pattern):
fname = os.path.basename(f)
# Only 2025 and 2026 files
if fname.startswith(('2025-', '2026-')):
files.append((f, "daily"))
# 2. MEMORY.md
memory_md = os.path.join(workspace, "MEMORY.md")
if os.path.exists(memory_md):
files.append((memory_md, "memory_md"))
# 3. Project files
projects_pattern = os.path.join(workspace, "memory", "projects", "*.md")
for f in glob.glob(projects_pattern):
files.append((f, "project"))
# Sort by date (newest first for dailies)
files.sort(key=lambda x: x[0], reverse=True)
return files
def bulk_load():
"""Run bulk load."""
print("=" * 60)
print("BULK MEMORY LOADER")
print("=" * 60)
print(f"Started: {datetime.now().strftime('%H:%M:%S')}")
print()
# Setup
setup_memory_vectors()
print("[OK] Database ready\n")
# Get files
files = get_all_memory_files()
print(f"Found {len(files)} files to process:\n")
# Show first 10
for f, t in files[:10]:
print(f" - {os.path.basename(f)} ({t})")
if len(files) > 10:
print(f" ... and {len(files) - 10} more\n")
# Process each
total_entries = 0
failed_files = []
for i, (filepath, source_type) in enumerate(files, 1):
print(f"\n[{i}/{len(files)}] {os.path.basename(filepath)}")
try:
entries = process_memory_file(filepath, source_type)
total_entries += entries
print(f" Created {entries} entries")
except Exception as e:
print(f" FAILED: {e}")
failed_files.append(filepath)
# Summary
print("\n" + "=" * 60)
print("BULK LOAD COMPLETE")
print("=" * 60)
print(f"Files processed: {len(files)}")
print(f"Total embeddings: {total_entries}")
if failed_files:
print(f"Failed files: {len(failed_files)}")
for f in failed_files:
print(f" - {f}")
print(f"Finished: {datetime.now().strftime('%H:%M:%S')}")
print("=" * 60)
return {
"files": len(files),
"entries": total_entries,
"failed": len(failed_files)
}
if __name__ == "__main__":
bulk_load()