Fresh start - excluded large ROM JSON files
This commit is contained in:
263
tools/session_monitor.py
Normal file
263
tools/session_monitor.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""
|
||||
Session Monitor
|
||||
===============
|
||||
Automatically tracks conversation transcripts and captures snapshots.
|
||||
Runs via cron every 2 minutes.
|
||||
|
||||
Reads OpenClaw session transcripts from:
|
||||
~/.openclaw/agents/main/sessions/*.jsonl
|
||||
|
||||
Usage:
|
||||
python session_monitor.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from memory_vector import store_memory
|
||||
|
||||
DB_PATH = os.path.expanduser("~/.openclaw/memory.db")
|
||||
SESSIONS_DIR = os.path.expanduser("~/.openclaw/agents/main/sessions")
|
||||
SNAPSHOT_THRESHOLD = 15 # Messages between snapshots
|
||||
|
||||
def get_db():
|
||||
"""Get database connection."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
def find_session_files():
|
||||
"""Find all session transcript files."""
|
||||
if not os.path.exists(SESSIONS_DIR):
|
||||
return []
|
||||
|
||||
files = []
|
||||
for f in Path(SESSIONS_DIR).glob("*.jsonl"):
|
||||
# Extract session ID from filename
|
||||
session_id = f.stem # filename without extension
|
||||
files.append({
|
||||
'session_id': session_id,
|
||||
'path': str(f),
|
||||
'mtime': os.path.getmtime(f)
|
||||
})
|
||||
|
||||
# Sort by most recent first
|
||||
files.sort(key=lambda x: x['mtime'], reverse=True)
|
||||
return files
|
||||
|
||||
def parse_transcript(filepath, start_index=0):
|
||||
"""
|
||||
Parse transcript file and extract user messages.
|
||||
Returns (messages, total_lines).
|
||||
"""
|
||||
messages = []
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
return messages, 0
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Cannot read {filepath}: {e}")
|
||||
return messages, 0
|
||||
|
||||
total_lines = len(lines)
|
||||
|
||||
# Process lines from start_index onwards
|
||||
for i, line in enumerate(lines[start_index:], start=start_index):
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Only count user messages
|
||||
if entry.get('type') == 'message':
|
||||
msg = entry.get('message', {})
|
||||
if msg.get('role') == 'user':
|
||||
content = msg.get('content', [])
|
||||
# Extract text content
|
||||
text_parts = []
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get('type') == 'text':
|
||||
text_parts.append(part.get('text', ''))
|
||||
elif isinstance(part, str):
|
||||
text_parts.append(part)
|
||||
|
||||
if text_parts:
|
||||
messages.append({
|
||||
'index': i,
|
||||
'timestamp': entry.get('timestamp'),
|
||||
'text': ' '.join(text_parts)
|
||||
})
|
||||
|
||||
return messages, total_lines
|
||||
|
||||
def generate_summary(messages):
|
||||
"""Generate a summary of messages for embedding."""
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
# Simple extraction: join first 1000 chars of message texts
|
||||
combined = ' | '.join([m['text'] for m in messages])
|
||||
return combined[:1000] if len(combined) > 1000 else combined
|
||||
|
||||
def process_session(session_info):
|
||||
"""
|
||||
Process a single session.
|
||||
Returns (snapshot_created, message_count).
|
||||
"""
|
||||
conn = get_db()
|
||||
cursor = conn.cursor()
|
||||
|
||||
session_id = session_info['session_id']
|
||||
filepath = session_info['path']
|
||||
|
||||
# Get or create tracking record
|
||||
cursor.execute('''
|
||||
SELECT * FROM session_tracking WHERE session_id = ?
|
||||
''', (session_id,))
|
||||
|
||||
tracking = cursor.fetchone()
|
||||
|
||||
if tracking is None:
|
||||
# New session
|
||||
cursor.execute('''
|
||||
INSERT INTO session_tracking
|
||||
(session_id, transcript_path, last_message_index, messages_since_snapshot)
|
||||
VALUES (?, ?, 0, 0)
|
||||
''', (session_id, filepath))
|
||||
conn.commit()
|
||||
last_index = 0
|
||||
since_snapshot = 0
|
||||
else:
|
||||
last_index = tracking['last_message_index']
|
||||
since_snapshot = tracking['messages_since_snapshot']
|
||||
|
||||
# Parse new messages
|
||||
messages, total_lines = parse_transcript(filepath, last_index)
|
||||
|
||||
if not messages:
|
||||
conn.close()
|
||||
return False, 0
|
||||
|
||||
new_count = len(messages)
|
||||
since_snapshot += new_count
|
||||
|
||||
print(f" [{session_id[:8]}...]: {new_count} new messages, {since_snapshot} since snapshot")
|
||||
|
||||
snapshot_created = False
|
||||
|
||||
# Check if threshold reached
|
||||
if since_snapshot >= SNAPSHOT_THRESHOLD:
|
||||
summary = generate_summary(messages[-SNAPSHOT_THRESHOLD:])
|
||||
|
||||
if summary and len(summary) > 50: # Only snapshot if substantive
|
||||
try:
|
||||
# Get embedding from Ollama
|
||||
import requests
|
||||
response = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": summary[:2000]},
|
||||
timeout=30
|
||||
)
|
||||
embedding = response.json()["embedding"]
|
||||
|
||||
# Store in database
|
||||
source_path = f"session://{session_id}#{datetime.now().strftime('%H:%M')}"
|
||||
store_memory(
|
||||
source_type="auto_session",
|
||||
source_path=source_path,
|
||||
content=summary,
|
||||
embedding=embedding
|
||||
)
|
||||
|
||||
print(f" [OK] Snapshot saved: {source_path}")
|
||||
since_snapshot = 0
|
||||
snapshot_created = True
|
||||
|
||||
except Exception as e:
|
||||
print(f" [ERROR] Failed to create snapshot: {e}")
|
||||
else:
|
||||
print(f" [SKIP] Content too short for snapshot")
|
||||
since_snapshot = 0 # Reset anyway to avoid getting stuck
|
||||
|
||||
# Update tracking
|
||||
cursor.execute('''
|
||||
UPDATE session_tracking
|
||||
SET last_message_index = ?,
|
||||
messages_since_snapshot = ?,
|
||||
last_checkpoint_time = CURRENT_TIMESTAMP,
|
||||
transcript_path = ?
|
||||
WHERE session_id = ?
|
||||
''', (total_lines, since_snapshot, filepath, session_id))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return snapshot_created, new_count
|
||||
|
||||
def cleanup_old_sessions():
|
||||
"""Delete session files older than 24 hours."""
|
||||
sessions_dir = Path(SESSIONS_DIR)
|
||||
cutoff_time = datetime.now().timestamp() - (24 * 3600) # 24 hours
|
||||
|
||||
deleted_count = 0
|
||||
|
||||
for jsonl_file in sessions_dir.glob("*.jsonl"):
|
||||
if jsonl_file.stat().st_mtime < cutoff_time:
|
||||
try:
|
||||
jsonl_file.unlink()
|
||||
deleted_count += 1
|
||||
print(f" [CLEANUP] Deleted: {jsonl_file.name}")
|
||||
except Exception as e:
|
||||
print(f" [CLEANUP] Failed to delete {jsonl_file.name}: {e}")
|
||||
|
||||
return deleted_count
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
print(f"\n{'='*60}")
|
||||
print("Session Monitor")
|
||||
print(f"{'='*60}")
|
||||
print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print()
|
||||
|
||||
# Find session files
|
||||
sessions = find_session_files()
|
||||
|
||||
if not sessions:
|
||||
print("[INFO] No active session files found")
|
||||
return
|
||||
|
||||
print(f"Found {len(sessions)} session file(s)")
|
||||
print()
|
||||
|
||||
total_snapshots = 0
|
||||
total_messages = 0
|
||||
|
||||
for session in sessions:
|
||||
snapshot, count = process_session(session)
|
||||
total_messages += count
|
||||
if snapshot:
|
||||
total_snapshots += 1
|
||||
|
||||
# Cleanup old sessions
|
||||
deleted = cleanup_old_sessions()
|
||||
|
||||
print()
|
||||
print(f"{'='*60}")
|
||||
print(f"Summary: {total_messages} messages, {total_snapshots} snapshots, {deleted} files cleaned")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user