Fresh start - excluded large ROM JSON files
This commit is contained in:
102
tools/bulk_memory_loader.py
Normal file
102
tools/bulk_memory_loader.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Bulk Memory Loader
|
||||
================
|
||||
Loads all historical memory files into vector database.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import glob
|
||||
from datetime import datetime
|
||||
from memory_vector import setup_memory_vectors, store_memory, MemoryVectorDB
|
||||
from tools.memory_embedding_worker import process_memory_file
|
||||
|
||||
def get_all_memory_files():
|
||||
"""Get all memory files to bulk load."""
|
||||
workspace = os.path.expanduser("~/.openclaw/workspace")
|
||||
files = []
|
||||
|
||||
# 1. Daily notes - all of them
|
||||
daily_pattern = os.path.join(workspace, "memory", "202*.md")
|
||||
for f in glob.glob(daily_pattern):
|
||||
fname = os.path.basename(f)
|
||||
# Only 2025 and 2026 files
|
||||
if fname.startswith(('2025-', '2026-')):
|
||||
files.append((f, "daily"))
|
||||
|
||||
# 2. MEMORY.md
|
||||
memory_md = os.path.join(workspace, "MEMORY.md")
|
||||
if os.path.exists(memory_md):
|
||||
files.append((memory_md, "memory_md"))
|
||||
|
||||
# 3. Project files
|
||||
projects_pattern = os.path.join(workspace, "memory", "projects", "*.md")
|
||||
for f in glob.glob(projects_pattern):
|
||||
files.append((f, "project"))
|
||||
|
||||
# Sort by date (newest first for dailies)
|
||||
files.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def bulk_load():
|
||||
"""Run bulk load."""
|
||||
print("=" * 60)
|
||||
print("BULK MEMORY LOADER")
|
||||
print("=" * 60)
|
||||
print(f"Started: {datetime.now().strftime('%H:%M:%S')}")
|
||||
print()
|
||||
|
||||
# Setup
|
||||
setup_memory_vectors()
|
||||
print("[OK] Database ready\n")
|
||||
|
||||
# Get files
|
||||
files = get_all_memory_files()
|
||||
print(f"Found {len(files)} files to process:\n")
|
||||
|
||||
# Show first 10
|
||||
for f, t in files[:10]:
|
||||
print(f" - {os.path.basename(f)} ({t})")
|
||||
if len(files) > 10:
|
||||
print(f" ... and {len(files) - 10} more\n")
|
||||
|
||||
# Process each
|
||||
total_entries = 0
|
||||
failed_files = []
|
||||
|
||||
for i, (filepath, source_type) in enumerate(files, 1):
|
||||
print(f"\n[{i}/{len(files)}] {os.path.basename(filepath)}")
|
||||
try:
|
||||
entries = process_memory_file(filepath, source_type)
|
||||
total_entries += entries
|
||||
print(f" Created {entries} entries")
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
failed_files.append(filepath)
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("BULK LOAD COMPLETE")
|
||||
print("=" * 60)
|
||||
print(f"Files processed: {len(files)}")
|
||||
print(f"Total embeddings: {total_entries}")
|
||||
if failed_files:
|
||||
print(f"Failed files: {len(failed_files)}")
|
||||
for f in failed_files:
|
||||
print(f" - {f}")
|
||||
print(f"Finished: {datetime.now().strftime('%H:%M:%S')}")
|
||||
print("=" * 60)
|
||||
|
||||
return {
|
||||
"files": len(files),
|
||||
"entries": total_entries,
|
||||
"failed": len(failed_files)
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bulk_load()
|
||||
Reference in New Issue
Block a user