""" Bulk Memory Loader ================ Loads all historical memory files into vector database. """ import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import glob from datetime import datetime from memory_vector import setup_memory_vectors, store_memory, MemoryVectorDB from tools.memory_embedding_worker import process_memory_file def get_all_memory_files(): """Get all memory files to bulk load.""" workspace = os.path.expanduser("~/.openclaw/workspace") files = [] # 1. Daily notes - all of them daily_pattern = os.path.join(workspace, "memory", "202*.md") for f in glob.glob(daily_pattern): fname = os.path.basename(f) # Only 2025 and 2026 files if fname.startswith(('2025-', '2026-')): files.append((f, "daily")) # 2. MEMORY.md memory_md = os.path.join(workspace, "MEMORY.md") if os.path.exists(memory_md): files.append((memory_md, "memory_md")) # 3. Project files projects_pattern = os.path.join(workspace, "memory", "projects", "*.md") for f in glob.glob(projects_pattern): files.append((f, "project")) # Sort by date (newest first for dailies) files.sort(key=lambda x: x[0], reverse=True) return files def bulk_load(): """Run bulk load.""" print("=" * 60) print("BULK MEMORY LOADER") print("=" * 60) print(f"Started: {datetime.now().strftime('%H:%M:%S')}") print() # Setup setup_memory_vectors() print("[OK] Database ready\n") # Get files files = get_all_memory_files() print(f"Found {len(files)} files to process:\n") # Show first 10 for f, t in files[:10]: print(f" - {os.path.basename(f)} ({t})") if len(files) > 10: print(f" ... and {len(files) - 10} more\n") # Process each total_entries = 0 failed_files = [] for i, (filepath, source_type) in enumerate(files, 1): print(f"\n[{i}/{len(files)}] {os.path.basename(filepath)}") try: entries = process_memory_file(filepath, source_type) total_entries += entries print(f" Created {entries} entries") except Exception as e: print(f" FAILED: {e}") failed_files.append(filepath) # Summary print("\n" + "=" * 60) print("BULK LOAD COMPLETE") print("=" * 60) print(f"Files processed: {len(files)}") print(f"Total embeddings: {total_entries}") if failed_files: print(f"Failed files: {len(failed_files)}") for f in failed_files: print(f" - {f}") print(f"Finished: {datetime.now().strftime('%H:%M:%S')}") print("=" * 60) return { "files": len(files), "entries": total_entries, "failed": len(failed_files) } if __name__ == "__main__": bulk_load()