Fresh start - excluded large ROM JSON files
This commit is contained in:
134
memory_vector.py
Normal file
134
memory_vector.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
Memory Vector Search Module
|
||||
Uses sqlite-vector extension for local embeddings.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import struct
|
||||
import json
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
db_path = r"C:\Users\admin\.openclaw\memory.db"
|
||||
dll_path = r"C:\Users\admin\AppData\Local\Programs\Python\Python313\Lib\site-packages\sqlite_vector\binaries\vector.dll"
|
||||
|
||||
DIMENSION = 768
|
||||
|
||||
|
||||
class MemoryVectorDB:
|
||||
def __init__(self):
|
||||
self.conn = sqlite3.connect(db_path)
|
||||
self.conn.enable_load_extension(True)
|
||||
self.conn.load_extension(dll_path)
|
||||
self.cursor = self.conn.cursor()
|
||||
# Always init vector on connection
|
||||
self._ensure_vector_init()
|
||||
|
||||
def _ensure_vector_init(self):
|
||||
"""Make sure vector column is initialized."""
|
||||
try:
|
||||
self.cursor.execute('SELECT vector_init(?, ?, ?)',
|
||||
('memory_embeddings', 'embedding', f'type=FLOAT32,dimension={DIMENSION}'))
|
||||
except sqlite3.OperationalError as e:
|
||||
# Already initialized is OK
|
||||
if 'already initialized' not in str(e).lower():
|
||||
raise
|
||||
|
||||
def close(self):
|
||||
self.conn.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
def setup(self):
|
||||
"""Create table. Safe to run multiple times."""
|
||||
self.cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS memory_embeddings (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source_type TEXT,
|
||||
source_path TEXT,
|
||||
content_text TEXT,
|
||||
embedding BLOB,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
''')
|
||||
self.conn.commit()
|
||||
return True
|
||||
|
||||
def insert(self, source_type: str, source_path: str, content_text: str,
|
||||
embedding: List[float]):
|
||||
"""Store a memory with its embedding."""
|
||||
vec_json = json.dumps(embedding)
|
||||
self.cursor.execute('''
|
||||
INSERT INTO memory_embeddings (source_type, source_path, content_text, embedding)
|
||||
VALUES (?, ?, ?, vector_as_f32(?))
|
||||
''', (source_type, source_path, content_text, vec_json))
|
||||
self.conn.commit()
|
||||
return self.cursor.lastrowid
|
||||
|
||||
def quantize(self):
|
||||
"""Quantize for fast search. Call after batch inserts."""
|
||||
self.cursor.execute("SELECT vector_quantize('memory_embeddings', 'embedding')")
|
||||
self.conn.commit()
|
||||
self.cursor.execute("SELECT vector_quantize_preload('memory_embeddings', 'embedding')")
|
||||
self.conn.commit()
|
||||
|
||||
def search(self, query_embedding: List[float], k: int = 5,
|
||||
source_type: Optional[str] = None):
|
||||
"""Search similar memories. Must quantize() first!"""
|
||||
query_blob = b''.join([struct.pack('f', f) for f in query_embedding])
|
||||
|
||||
if source_type:
|
||||
self.cursor.execute('''
|
||||
SELECT e.source_path, e.content_text, v.distance
|
||||
FROM memory_embeddings AS e
|
||||
JOIN vector_quantize_scan('memory_embeddings', 'embedding', ?, ?) AS v
|
||||
ON e.id = v.rowid
|
||||
WHERE e.source_type = ?
|
||||
''', (query_blob, k, source_type))
|
||||
else:
|
||||
self.cursor.execute('''
|
||||
SELECT e.source_path, e.content_text, v.distance
|
||||
FROM memory_embeddings AS e
|
||||
JOIN vector_quantize_scan('memory_embeddings', 'embedding', ?, ?) AS v
|
||||
ON e.id = v.rowid
|
||||
''', (query_blob, k))
|
||||
|
||||
return self.cursor.fetchall()
|
||||
|
||||
|
||||
def setup_memory_vectors():
|
||||
"""One-time setup."""
|
||||
with MemoryVectorDB() as db:
|
||||
return db.setup()
|
||||
|
||||
|
||||
def store_memory(source_type: str, source_path: str, content: str,
|
||||
embedding: List[float]):
|
||||
"""Store a memory."""
|
||||
with MemoryVectorDB() as db:
|
||||
rowid = db.insert(source_type, source_path, content, embedding)
|
||||
db.quantize()
|
||||
return rowid
|
||||
|
||||
|
||||
def search_memories(query_embedding: List[float], k: int = 5):
|
||||
"""Search memories."""
|
||||
with MemoryVectorDB() as db:
|
||||
return db.search(query_embedding, k)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_memory_vectors()
|
||||
print("[OK] Memory vector DB ready")
|
||||
|
||||
zero_vec = [0.0] * DIMENSION
|
||||
store_memory("test", "test.txt", "Hello world", zero_vec)
|
||||
print("[OK] Test memory stored")
|
||||
|
||||
results = search_memories(zero_vec, k=1)
|
||||
print(f"[OK] Found {len(results)} result(s)")
|
||||
for r in results:
|
||||
print(f" - {r}")
|
||||
Reference in New Issue
Block a user