Fresh start - excluded large ROM JSON files

2026-04-11 09:45:12 -05:00
commit 5deb387aa6
395 changed files with 47744 additions and 0 deletions
--- a/tools/session_monitor.py
+++ b/tools/session_monitor.py
@@ -0,0 +1,263 @@
+"""
+Session Monitor
+===============
+Automatically tracks conversation transcripts and captures snapshots.
+Runs via cron every 2 minutes.
+
+Reads OpenClaw session transcripts from:
+    ~/.openclaw/agents/main/sessions/*.jsonl
+
+Usage:
+    python session_monitor.py
+"""
+
+import os
+import sys
+import json
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from memory_vector import store_memory
+
+DB_PATH = os.path.expanduser("~/.openclaw/memory.db")
+SESSIONS_DIR = os.path.expanduser("~/.openclaw/agents/main/sessions")
+SNAPSHOT_THRESHOLD = 15  # Messages between snapshots
+
+def get_db():
+    """Get database connection."""
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    return conn
+
+def find_session_files():
+    """Find all session transcript files."""
+    if not os.path.exists(SESSIONS_DIR):
+        return []
+    
+    files = []
+    for f in Path(SESSIONS_DIR).glob("*.jsonl"):
+        # Extract session ID from filename
+        session_id = f.stem  # filename without extension
+        files.append({
+            'session_id': session_id,
+            'path': str(f),
+            'mtime': os.path.getmtime(f)
+        })
+    
+    # Sort by most recent first
+    files.sort(key=lambda x: x['mtime'], reverse=True)
+    return files
+
+def parse_transcript(filepath, start_index=0):
+    """
+    Parse transcript file and extract user messages.
+    Returns (messages, total_lines).
+    """
+    messages = []
+    
+    if not os.path.exists(filepath):
+        return messages, 0
+    
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+    except Exception as e:
+        print(f"[ERROR] Cannot read {filepath}: {e}")
+        return messages, 0
+    
+    total_lines = len(lines)
+    
+    # Process lines from start_index onwards
+    for i, line in enumerate(lines[start_index:], start=start_index):
+        if not line.strip():
+            continue
+        
+        try:
+            entry = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        
+        # Only count user messages
+        if entry.get('type') == 'message':
+            msg = entry.get('message', {})
+            if msg.get('role') == 'user':
+                content = msg.get('content', [])
+                # Extract text content
+                text_parts = []
+                for part in content:
+                    if isinstance(part, dict) and part.get('type') == 'text':
+                        text_parts.append(part.get('text', ''))
+                    elif isinstance(part, str):
+                        text_parts.append(part)
+                
+                if text_parts:
+                    messages.append({
+                        'index': i,
+                        'timestamp': entry.get('timestamp'),
+                        'text': ' '.join(text_parts)
+                    })
+    
+    return messages, total_lines
+
+def generate_summary(messages):
+    """Generate a summary of messages for embedding."""
+    if not messages:
+        return None
+    
+    # Simple extraction: join first 1000 chars of message texts
+    combined = ' | '.join([m['text'] for m in messages])
+    return combined[:1000] if len(combined) > 1000 else combined
+
+def process_session(session_info):
+    """
+    Process a single session.
+    Returns (snapshot_created, message_count).
+    """
+    conn = get_db()
+    cursor = conn.cursor()
+    
+    session_id = session_info['session_id']
+    filepath = session_info['path']
+    
+    # Get or create tracking record
+    cursor.execute('''
+        SELECT * FROM session_tracking WHERE session_id = ?
+    ''', (session_id,))
+    
+    tracking = cursor.fetchone()
+    
+    if tracking is None:
+        # New session
+        cursor.execute('''
+            INSERT INTO session_tracking 
+            (session_id, transcript_path, last_message_index, messages_since_snapshot)
+            VALUES (?, ?, 0, 0)
+        ''', (session_id, filepath))
+        conn.commit()
+        last_index = 0
+        since_snapshot = 0
+    else:
+        last_index = tracking['last_message_index']
+        since_snapshot = tracking['messages_since_snapshot']
+    
+    # Parse new messages
+    messages, total_lines = parse_transcript(filepath, last_index)
+    
+    if not messages:
+        conn.close()
+        return False, 0
+    
+    new_count = len(messages)
+    since_snapshot += new_count
+    
+    print(f"  [{session_id[:8]}...]: {new_count} new messages, {since_snapshot} since snapshot")
+    
+    snapshot_created = False
+    
+    # Check if threshold reached
+    if since_snapshot >= SNAPSHOT_THRESHOLD:
+        summary = generate_summary(messages[-SNAPSHOT_THRESHOLD:])
+        
+        if summary and len(summary) > 50:  # Only snapshot if substantive
+            try:
+                # Get embedding from Ollama
+                import requests
+                response = requests.post(
+                    "http://localhost:11434/api/embeddings",
+                    json={"model": "nomic-embed-text", "prompt": summary[:2000]},
+                    timeout=30
+                )
+                embedding = response.json()["embedding"]
+                
+                # Store in database
+                source_path = f"session://{session_id}#{datetime.now().strftime('%H:%M')}"
+                store_memory(
+                    source_type="auto_session",
+                    source_path=source_path,
+                    content=summary,
+                    embedding=embedding
+                )
+                
+                print(f"  [OK] Snapshot saved: {source_path}")
+                since_snapshot = 0
+                snapshot_created = True
+                
+            except Exception as e:
+                print(f"  [ERROR] Failed to create snapshot: {e}")
+        else:
+            print(f"  [SKIP] Content too short for snapshot")
+            since_snapshot = 0  # Reset anyway to avoid getting stuck
+    
+    # Update tracking
+    cursor.execute('''
+        UPDATE session_tracking 
+        SET last_message_index = ?,
+            messages_since_snapshot = ?,
+            last_checkpoint_time = CURRENT_TIMESTAMP,
+            transcript_path = ?
+        WHERE session_id = ?
+    ''', (total_lines, since_snapshot, filepath, session_id))
+    
+    conn.commit()
+    conn.close()
+    
+    return snapshot_created, new_count
+
+def cleanup_old_sessions():
+    """Delete session files older than 24 hours."""
+    sessions_dir = Path(SESSIONS_DIR)
+    cutoff_time = datetime.now().timestamp() - (24 * 3600)  # 24 hours
+    
+    deleted_count = 0
+    
+    for jsonl_file in sessions_dir.glob("*.jsonl"):
+        if jsonl_file.stat().st_mtime < cutoff_time:
+            try:
+                jsonl_file.unlink()
+                deleted_count += 1
+                print(f"  [CLEANUP] Deleted: {jsonl_file.name}")
+            except Exception as e:
+                print(f"  [CLEANUP] Failed to delete {jsonl_file.name}: {e}")
+    
+    return deleted_count
+
+def main():
+    """Main entry point."""
+    print(f"\n{'='*60}")
+    print("Session Monitor")
+    print(f"{'='*60}")
+    print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print()
+    
+    # Find session files
+    sessions = find_session_files()
+    
+    if not sessions:
+        print("[INFO] No active session files found")
+        return
+    
+    print(f"Found {len(sessions)} session file(s)")
+    print()
+    
+    total_snapshots = 0
+    total_messages = 0
+    
+    for session in sessions:
+        snapshot, count = process_session(session)
+        total_messages += count
+        if snapshot:
+            total_snapshots += 1
+    
+    # Cleanup old sessions
+    deleted = cleanup_old_sessions()
+    
+    print()
+    print(f"{'='*60}")
+    print(f"Summary: {total_messages} messages, {total_snapshots} snapshots, {deleted} files cleaned")
+    print(f"{'='*60}\n")
+
+if __name__ == "__main__":
+    main()