103 lines
2.8 KiB
Python
103 lines
2.8 KiB
Python
"""
|
|
Bulk Memory Loader
|
|
================
|
|
Loads all historical memory files into vector database.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
import glob
|
|
from datetime import datetime
|
|
from memory_vector import setup_memory_vectors, store_memory, MemoryVectorDB
|
|
from tools.memory_embedding_worker import process_memory_file
|
|
|
|
def get_all_memory_files():
|
|
"""Get all memory files to bulk load."""
|
|
workspace = os.path.expanduser("~/.openclaw/workspace")
|
|
files = []
|
|
|
|
# 1. Daily notes - all of them
|
|
daily_pattern = os.path.join(workspace, "memory", "202*.md")
|
|
for f in glob.glob(daily_pattern):
|
|
fname = os.path.basename(f)
|
|
# Only 2025 and 2026 files
|
|
if fname.startswith(('2025-', '2026-')):
|
|
files.append((f, "daily"))
|
|
|
|
# 2. MEMORY.md
|
|
memory_md = os.path.join(workspace, "MEMORY.md")
|
|
if os.path.exists(memory_md):
|
|
files.append((memory_md, "memory_md"))
|
|
|
|
# 3. Project files
|
|
projects_pattern = os.path.join(workspace, "memory", "projects", "*.md")
|
|
for f in glob.glob(projects_pattern):
|
|
files.append((f, "project"))
|
|
|
|
# Sort by date (newest first for dailies)
|
|
files.sort(key=lambda x: x[0], reverse=True)
|
|
|
|
return files
|
|
|
|
|
|
def bulk_load():
|
|
"""Run bulk load."""
|
|
print("=" * 60)
|
|
print("BULK MEMORY LOADER")
|
|
print("=" * 60)
|
|
print(f"Started: {datetime.now().strftime('%H:%M:%S')}")
|
|
print()
|
|
|
|
# Setup
|
|
setup_memory_vectors()
|
|
print("[OK] Database ready\n")
|
|
|
|
# Get files
|
|
files = get_all_memory_files()
|
|
print(f"Found {len(files)} files to process:\n")
|
|
|
|
# Show first 10
|
|
for f, t in files[:10]:
|
|
print(f" - {os.path.basename(f)} ({t})")
|
|
if len(files) > 10:
|
|
print(f" ... and {len(files) - 10} more\n")
|
|
|
|
# Process each
|
|
total_entries = 0
|
|
failed_files = []
|
|
|
|
for i, (filepath, source_type) in enumerate(files, 1):
|
|
print(f"\n[{i}/{len(files)}] {os.path.basename(filepath)}")
|
|
try:
|
|
entries = process_memory_file(filepath, source_type)
|
|
total_entries += entries
|
|
print(f" Created {entries} entries")
|
|
except Exception as e:
|
|
print(f" FAILED: {e}")
|
|
failed_files.append(filepath)
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print("BULK LOAD COMPLETE")
|
|
print("=" * 60)
|
|
print(f"Files processed: {len(files)}")
|
|
print(f"Total embeddings: {total_entries}")
|
|
if failed_files:
|
|
print(f"Failed files: {len(failed_files)}")
|
|
for f in failed_files:
|
|
print(f" - {f}")
|
|
print(f"Finished: {datetime.now().strftime('%H:%M:%S')}")
|
|
print("=" * 60)
|
|
|
|
return {
|
|
"files": len(files),
|
|
"entries": total_entries,
|
|
"failed": len(failed_files)
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
bulk_load()
|