Fresh start - excluded large ROM JSON files

This commit is contained in:
OpenClaw Agent
2026-04-11 09:45:12 -05:00
commit 5deb387aa6
395 changed files with 47744 additions and 0 deletions

102
tools/bulk_memory_loader.py Normal file
View File

@@ -0,0 +1,102 @@
"""
Bulk Memory Loader
================
Loads all historical memory files into vector database.
"""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import glob
from datetime import datetime
from memory_vector import setup_memory_vectors, store_memory, MemoryVectorDB
from tools.memory_embedding_worker import process_memory_file
def get_all_memory_files():
"""Get all memory files to bulk load."""
workspace = os.path.expanduser("~/.openclaw/workspace")
files = []
# 1. Daily notes - all of them
daily_pattern = os.path.join(workspace, "memory", "202*.md")
for f in glob.glob(daily_pattern):
fname = os.path.basename(f)
# Only 2025 and 2026 files
if fname.startswith(('2025-', '2026-')):
files.append((f, "daily"))
# 2. MEMORY.md
memory_md = os.path.join(workspace, "MEMORY.md")
if os.path.exists(memory_md):
files.append((memory_md, "memory_md"))
# 3. Project files
projects_pattern = os.path.join(workspace, "memory", "projects", "*.md")
for f in glob.glob(projects_pattern):
files.append((f, "project"))
# Sort by date (newest first for dailies)
files.sort(key=lambda x: x[0], reverse=True)
return files
def bulk_load():
"""Run bulk load."""
print("=" * 60)
print("BULK MEMORY LOADER")
print("=" * 60)
print(f"Started: {datetime.now().strftime('%H:%M:%S')}")
print()
# Setup
setup_memory_vectors()
print("[OK] Database ready\n")
# Get files
files = get_all_memory_files()
print(f"Found {len(files)} files to process:\n")
# Show first 10
for f, t in files[:10]:
print(f" - {os.path.basename(f)} ({t})")
if len(files) > 10:
print(f" ... and {len(files) - 10} more\n")
# Process each
total_entries = 0
failed_files = []
for i, (filepath, source_type) in enumerate(files, 1):
print(f"\n[{i}/{len(files)}] {os.path.basename(filepath)}")
try:
entries = process_memory_file(filepath, source_type)
total_entries += entries
print(f" Created {entries} entries")
except Exception as e:
print(f" FAILED: {e}")
failed_files.append(filepath)
# Summary
print("\n" + "=" * 60)
print("BULK LOAD COMPLETE")
print("=" * 60)
print(f"Files processed: {len(files)}")
print(f"Total embeddings: {total_entries}")
if failed_files:
print(f"Failed files: {len(failed_files)}")
for f in failed_files:
print(f" - {f}")
print(f"Finished: {datetime.now().strftime('%H:%M:%S')}")
print("=" * 60)
return {
"files": len(files),
"entries": total_entries,
"failed": len(failed_files)
}
if __name__ == "__main__":
bulk_load()

6
tools/check_cron.py Normal file
View File

@@ -0,0 +1,6 @@
import sqlite3
conn = sqlite3.connect(r'C:\Users\admin\.openclaw\cron.db')
c = conn.cursor()
c.execute('''SELECT name FROM sqlite_master WHERE type=''table''''')
for r in c.fetchall():
print(r)

View File

@@ -0,0 +1,249 @@
"""
Memory Embedding Worker
=======================
Process memory files and store with embeddings in SQLite.
Runs as cron job or standalone.
Usage:
python memory_embedding_worker.py [--date YYYY-MM-DD]
"""
import os
import sys
# Add parent dir to path for memory_vector
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import json
import re
import requests
from datetime import datetime, timedelta
from typing import List, Tuple, Optional
from memory_vector import setup_memory_vectors, store_memory, MemoryVectorDB
# Ollama configuration
OLLAMA_URL = "http://localhost:11434"
EMBED_MODEL = "nomic-embed-text"
CHUNK_SIZE = 512 # Characters per chunk
def generate_embedding(text: str) -> Optional[List[float]]:
"""Generate embedding using Ollama."""
try:
response = requests.post(
f"{OLLAMA_URL}/api/embeddings",
json={
"model": EMBED_MODEL,
"prompt": text[:8192] # Truncate if too long
},
timeout=60
)
response.raise_for_status()
return response.json()["embedding"]
except Exception as e:
print(f"[ERROR] Failed to generate embedding: {e}")
return None
def chunk_text(text: str, max_chars: int = CHUNK_SIZE) -> List[str]:
"""Split text into chunks for embedding."""
# Split by paragraphs first
paragraphs = text.split('\n\n')
chunks = []
current_chunk = ""
for para in paragraphs:
if len(current_chunk) + len(para) + 2 <= max_chars:
current_chunk += f"\n\n{para}" if current_chunk else para
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = para
if current_chunk:
chunks.append(current_chunk.strip())
return chunks if chunks else [text[:max_chars]]
def extract_sections(content: str) -> List[Tuple[str, str]]:
"""Extract titled sections from markdown content."""
sections = []
# Look for ## headers
pattern = r'#{2,3}\s+(.+?)\n(.*?)(?=#{2,3}\s+|\Z)'
matches = re.findall(pattern, content, re.DOTALL)
for title, body in matches:
title = title.strip()
body = body.strip()
if body and len(body) > 50: # Skip empty or tiny sections
sections.append((title, body))
# If no sections found, treat whole content as one section
if not sections and content.strip():
sections.append(("General", content.strip()))
return sections
def process_memory_file(filepath: str, source_type: str) -> int:
"""
Process a single memory file and store with embeddings.
Returns number of entries created.
Args:
filepath: Path to the memory file
source_type: 'daily', 'memory_md', 'project', etc.
"""
if not os.path.exists(filepath):
print(f"[SKIP] File not found: {filepath}")
return 0
with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()
# Get filename as source_path reference
source_path = os.path.basename(filepath)
# Extract sections
sections = extract_sections(content)
entries_created = 0
print(f"[PROCESS] {source_path}: {len(sections)} sections found")
for section_title, section_content in sections:
# Create chunks
chunks = chunk_text(section_content)
for i, chunk in enumerate(chunks):
# Generate embedding
embedding = generate_embedding(chunk)
if embedding:
# Store in database
chunk_label = f"{section_title} (chunk {i+1}/{len(chunks)})" if len(chunks) > 1 else section_title
store_memory(
source_type=source_type,
source_path=f"{source_path}#{chunk_label}",
content=chunk[:500], # Store preview
embedding=embedding
)
entries_created += 1
print(f" [STORED] Chunk {(i+1)}/{len(chunks)}")
else:
print(f" [FAILED] Chunk {(i+1)}/{len(chunks)}")
return entries_created
def get_memory_files(date: Optional[str] = None) -> List[Tuple[str, str]]:
"""
Get list of memory files to process.
Returns list of (filepath, source_type) tuples.
"""
files = []
workspace = os.path.expanduser("~/.openclaw/workspace")
if date:
# Specific date
daily_path = os.path.join(workspace, "memory", f"{date}.md")
if os.path.exists(daily_path):
files.append((daily_path, "daily"))
else:
# Yesterday's daily note
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
daily_path = os.path.join(workspace, "memory", f"{yesterday}.md")
if os.path.exists(daily_path):
files.append((daily_path, "daily"))
# Also check today's file (in case running during day)
today = datetime.now().strftime("%Y-%m-%d")
today_path = os.path.join(workspace, "memory", f"{today}.md")
if os.path.exists(today_path) and today_path != daily_path:
files.append((today_path, "daily"))
# Always include MEMORY.md (only process if modified recently?)
memory_md = os.path.join(workspace, "MEMORY.md")
if os.path.exists(memory_md):
# Check if modified in last 24h
mtime = datetime.fromtimestamp(os.path.getmtime(memory_md))
if datetime.now() - mtime < timedelta(hours=24):
files.append((memory_md, "memory_md"))
return files
def run_daily_sync(date: Optional[str] = None, dry_run: bool = False) -> dict:
"""
Run the daily memory embedding sync.
Args:
date: Specific date to process (YYYY-MM-DD), or None for yesterday
dry_run: If True, don't actually store embeddings
Returns:
Dict with stats
"""
print("=" * 50)
print("Memory Embedding Worker")
print("=" * 50)
# Setup database
if not dry_run:
setup_memory_vectors()
print("[OK] Database ready\n")
else:
print("[DRY RUN] No database changes\n")
# Get files to process
files = get_memory_files(date)
if not files:
print("[INFO] No memory files to process")
return {"files": 0, "entries": 0}
print(f"[INFO] Processing {len(files)} file(s):\n")
for f, t in files:
print(f" - {f} ({t})")
print()
# Process each file
total_entries = 0
stats = {"files": len(files), "entries": 0, "failed": 0}
for filepath, source_type in files:
print(f"\n[FILE] {os.path.basename(filepath)}")
entries = process_memory_file(filepath, source_type)
total_entries += entries
print(f" Created {entries} embedding entries")
stats["entries"] = total_entries
# Summary
print("\n" + "=" * 50)
print("SUMMARY")
print("=" * 50)
print(f"Files processed: {stats['files']}")
print(f"Embedding entries: {stats['entries']}")
return stats
def main():
"""CLI entry point."""
import argparse
parser = argparse.ArgumentParser(description='Memory Embedding Worker')
parser.add_argument('--date', help='Process specific date (YYYY-MM-DD)')
parser.add_argument('--dry-run', action='store_true', help='Test without storing')
args = parser.parse_args()
stats = run_daily_sync(date=args.date, dry_run=args.dry_run)
# Return exit code: 0 if success, 1 if no entries
return 0 if stats["entries"] > 0 else 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""Simple Proton Mail Bridge IMAP test - clean version"""
import socket
IMAP_HOST = "127.0.0.1"
IMAP_PORT = 1143
USERNAME = "alexthenerdyai@proton.me"
PASSWORD = "Is413#dfslw@alex"
def get_response(sock, tag):
"""Read response until we see our tag"""
response = b""
while True:
chunk = sock.recv(4096)
if not chunk:
break
response += chunk
if tag.encode() in response:
break
return response.decode()
def main():
print("=" * 50)
print("Proton Mail Bridge - IMAP Test")
print("=" * 50)
# Connect
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(10)
sock.connect((IMAP_HOST, IMAP_PORT))
# Greeting
greeting = sock.recv(1024).decode()
print(f"\n[CONNECT] {greeting.strip()[:80]}...")
# Login
sock.send(f'A1 LOGIN "{USERNAME}" "{PASSWORD}"\r\n'.encode())
resp = get_response(sock, "A1 OK")
print("[LOGIN] Success!")
# List folders with LSUB
sock.send(b'A2 LSUB "" "*"\r\n')
resp = get_response(sock, "A2 OK")
print("\n[FOLDERS]")
folders = []
for line in resp.split('\r\n'):
if 'LSUB' in line and '/' in line:
folder = line.split('"/"')[-1].strip().strip('"')
folders.append(folder)
print(f" - {folder}")
# Check INBOX
sock.send(b'A3 EXAMINE "INBOX"\r\n')
resp = get_response(sock, "A3 OK")
# Count messages
exists = 0
for line in resp.split('\r\n'):
if 'EXISTS' in line:
exists = int(line.split()[1])
break
print(f"\n[INBOX] {exists} total messages")
# Check unread
sock.send(b'A4 SEARCH UNSEEN\r\n')
resp = get_response(sock, "A4 OK")
unseen_count = 0
for line in resp.split('\r\n'):
if 'SEARCH' in line and '*' in line:
parts = line.split('SEARCH')
if len(parts) > 1:
unseen = parts[1].strip().split()
unseen_count = len(unseen) if unseen and unseen[0] else 0
print(f"[INBOX] {unseen_count} unread messages")
# Logout
sock.send(b'A5 LOGOUT\r\n')
print("\n[LOGOUT] Done!")
sock.close()
if __name__ == "__main__":
main()

109
tools/proton_imap_test.py Normal file
View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python3
"""Quick test of Proton Mail Bridge IMAP connectivity"""
import socket
import ssl
import base64
# Proton Bridge settings
IMAP_HOST = "127.0.0.1"
IMAP_PORT = 1143
USERNAME = "alexthenerdyai@proton.me"
PASSWORD = "8yiNBTJBMc6HyOQjIZKjMw"
def send_command(sock, cmd):
"""Send IMAP command and get response"""
sock.send(f"{cmd}\r\n".encode())
response = b""
while True:
try:
sock.settimeout(5)
chunk = sock.recv(4096)
if not chunk:
break
response += chunk
# Check if we got a complete response (ends with \r\n)
if b"\r\n" in chunk:
break
except socket.timeout:
break
return response.decode()
def main():
print("=" * 50)
print("Proton Mail Bridge IMAP Test")
print("=" * 50)
# Connect to IMAP server
print(f"\n[1] Connecting to {IMAP_HOST}:{IMAP_PORT}...")
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(10)
try:
sock.connect((IMAP_HOST, IMAP_PORT))
greeting = sock.recv(1024).decode()
print(f"[OK] Connected! Server: {greeting.strip()}")
except Exception as e:
print(f"[ERR] Connection failed: {e}")
return
# Authenticate with LOGIN
print("\n[2] Authenticating...")
response = send_command(sock, f'A1 LOGIN "{USERNAME}" "{PASSWORD}"')
if "OK" in response and "completed" in response.lower():
print("[OK] Authentication successful!")
elif "OK" in response:
print("[OK] Authentication successful!")
else:
print(f"[ERR] Authentication failed: {response.strip()}")
sock.close()
return
# List folders
print("\n[3] Listing mailboxes...")
# Try LSUB first (subscribed folders)
response = send_command(sock, 'A2a LSUB "" "*"')
print(f"LSUB response:\n{response}")
response = send_command(sock, 'A2b LIST "" "*"')
print(f"LIST response:\n{response}")
folders = []
for line in response.split("\r\n"):
if 'LSUB' in line:
# Parse folder name from LSUB response
if '"/' in line:
folder = line.split('"/"')[-1].strip().strip('"')
else:
folder = line.split()[-1].strip().strip('"')
folders.append(folder)
print(f" [FOLDER] {folder}")
# Check INBOX for unread
print("\n[4] Checking INBOX...")
response = send_command(sock, 'A3 EXAMINE "INBOX"')
print(f"EXAMINE response: {response[:200]}")
# Search for unread messages
response = send_command(sock, 'A4 SEARCH UNSEEN')
print(f"SEARCH response: {response[:200]}")
if 'SEARCH' in response:
unseen = response.split('SEARCH')[1].strip().split()
if unseen and unseen[0] != '':
print(f"[OK] {len(unseen)} unread messages")
else:
print("[OK] No unread messages")
else:
print(" Could not check unread count")
# Logout
print("\n[5] Logging out...")
send_command(sock, "A5 LOGOUT")
print("[OK] Done!")
sock.close()
if __name__ == "__main__":
main()

238
tools/proton_read_email.py Normal file
View File

@@ -0,0 +1,238 @@
#!/usr/bin/env python3
"""Read emails from Proton Mail INBOX - Full parsing"""
import socket
import email
from email import policy
from email.parser import BytesParser
from html.parser import HTMLParser
IMAP_HOST = "127.0.0.1"
IMAP_PORT = 1143
USERNAME = "alexthenerdyai@proton.me"
PASSWORD = "8yiNBTJBMc6HyOQjIZKjMw"
def strip_html(html):
"""Convert HTML to clean plain text"""
import re
# Remove style/script tags and their contents
html = re.sub(r'<(style|script)[^>]*>[^<]*</\1>', ' ', html, flags=re.IGNORECASE | re.DOTALL)
# Replace common block elements with newlines
html = re.sub(r'</(p|div|h[1-6]|li|tr|br)>', '\n', html, flags=re.IGNORECASE)
# Replace table cells with tabs
html = re.sub(r'</(td|th)>', '\t', html, flags=re.IGNORECASE)
# Remove all remaining HTML tags
html = re.sub(r'<[^>]+>', ' ', html)
# Remove CSS @media queries and blocks
html = re.sub(r'@media[^{]*\{[^}]*\}', ' ', html)
# Clean up whitespace
html = re.sub(r'[\t ]+', ' ', html)
html = re.sub(r'\n[\n ]+', '\n\n', html)
# Decode common HTML entities
html = html.replace('&nbsp;', ' ')
html = html.replace('&lt;', '<')
html = html.replace('&gt;', '>')
html = html.replace('&amp;', '&')
html = html.replace('&quot;', '"')
html = html.replace('&#8217;', "'")
html = html.replace('&#8216;', "'")
html = html.replace('&#8220;', '"')
html = html.replace('&#8221;', '"')
html = html.replace('&#8230;', '...')
return html.strip()
def get_response(sock, tag):
"""Read response until we see our tag"""
response = b""
while True:
chunk = sock.recv(16384)
if not chunk:
break
response += chunk
if tag.encode() in response:
break
return response
def extract_rfc822(data):
"""Extract RFC822 email from IMAP FETCH response"""
# Find the start of the email data
start = data.find(b'\r\n')
if start == -1:
return b''
# Find the end (A# OK)
lines = data.split(b'\r\n')
email_lines = []
collect = False
for line in lines:
# Start collecting after the FETCH response line
if b'FETCH' in line and b'RFC822' in line:
collect = True
continue
# Stop at the OK line
if line.startswith((b'A4 ', b'A5 ', b'A6 ')) and b'OK' in line:
break
if collect:
email_lines.append(line)
return b'\r\n'.join(email_lines)
def parse_email_body(msg):
"""Extract plain text body from email message"""
body_parts = []
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition", ""))
# Skip attachments
if "attachment" in content_disposition:
continue
payload = part.get_payload(decode=True)
if not payload:
continue
charset = part.get_content_charset() or 'utf-8'
try:
text = payload.decode(charset, errors='replace')
except:
text = payload.decode('utf-8', errors='replace')
if content_type == "text/plain":
body_parts.append(("text", text))
elif content_type == "text/html":
plain = strip_html(text)
body_parts.append(("html", plain))
else:
payload = msg.get_payload(decode=True)
if payload:
charset = msg.get_content_charset() or 'utf-8'
try:
text = payload.decode(charset, errors='replace')
except:
text = payload.decode('utf-8', errors='replace')
content_type = msg.get_content_type()
if content_type == "text/html":
text = strip_html(text)
body_parts.append(("text", text))
return body_parts
def format_email(msg):
"""Format email for display"""
output = []
output.append("=" * 60)
# Headers
subject = msg['Subject'] or '(No Subject)'
from_addr = msg['From'] or '(Unknown)'
to_addr = msg['To'] or '(Unknown)'
date = msg['Date'] or '(No Date)'
output.append(f"FROM: {from_addr}")
output.append(f"TO: {to_addr}")
output.append(f"DATE: {date}")
output.append(f"SUBJECT: {subject}")
output.append("=" * 60)
# Body
body_parts = parse_email_body(msg)
if body_parts:
# Prefer plain text, but use HTML if that's all we have
text_body = None
for part_type, text in body_parts:
if part_type == "text":
text_body = text
break
if not text_body and body_parts:
text_body = body_parts[0][1]
if text_body:
output.append("\nBODY:")
output.append("-" * 60)
# Clean up whitespace
text_body = '\n'.join(line.strip() for line in text_body.split('\n'))
text_body = '\n'.join(filter(None, text_body.split('\n')))
output.append(text_body[:2000]) # Limit length
if len(text_body) > 2000:
output.append("\n[... message truncated ...]")
else:
output.append("\n[No readable body content]")
return '\n'.join(output)
def read_message(sock, msg_num):
"""Fetch and display a message"""
sock.send(f'A4 FETCH {msg_num} RFC822\r\n'.encode())
resp = get_response(sock, "A4 OK")
email_data = extract_rfc822(resp)
if not email_data:
print(f"[ERR] Could not extract message {msg_num}")
return
try:
msg = BytesParser(policy=policy.default).parsebytes(email_data)
print(format_email(msg))
except Exception as e:
print(f"[ERR] Failed to parse: {e}")
print("RAW:", email_data[:500])
def main():
print("Proton Mail - Email Reader")
print("=" * 60)
# Connect
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(15)
sock.connect((IMAP_HOST, IMAP_PORT))
sock.recv(1024) # Greeting
# Login
sock.send(f'A1 LOGIN "{USERNAME}" "{PASSWORD}"\r\n'.encode())
get_response(sock, "A1 OK")
print("[LOGIN] Success\n")
# Select INBOX
sock.send(b'A2 SELECT "INBOX"\r\n')
get_response(sock, "A2 OK")
# Get message list
sock.send(b'A3 SEARCH ALL\r\n')
resp = get_response(sock, "A3 OK").decode()
msg_nums = []
for line in resp.split('\r\n'):
if 'SEARCH' in line and '*' in line:
parts = line.split('SEARCH')
if len(parts) > 1:
msg_nums = [n for n in parts[1].strip().split() if n.isdigit()]
print(f"[INFO] {len(msg_nums)} messages in INBOX\n")
# Read oldest (first) message
if msg_nums:
read_message(sock, msg_nums[0])
# Logout
sock.send(b'A5 LOGOUT\r\n')
sock.close()
print("\n" + "=" * 60)
print("[DONE]")
if __name__ == "__main__":
main()

81
tools/reminder-bot.py Normal file
View File

@@ -0,0 +1,81 @@
#!/usr/bin/env python3
"""
Discord Reminder Bot - Called by OpenClaw cron to deliver reminders
Uses the message tool to actually post to Discord
"""
import sqlite3
import os
import sys
import json
from datetime import datetime
DB_PATH = os.path.expanduser("~/.openclaw/workspace/data/reminders.db")
def get_due_reminders():
"""Get reminders that are due now."""
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''SELECT id, user_id, channel_id, message, remind_at
FROM reminders
WHERE active = 1 AND remind_at <= datetime('now')''')
results = c.fetchall()
conn.close()
return results
def mark_reminder_sent(reminder_id):
"""Mark reminder as inactive after sending."""
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('UPDATE reminders SET active = 0 WHERE id = ?', (reminder_id,))
conn.commit()
conn.close()
def format_reminder_message(message, user_id):
"""Format the reminder with mention."""
return f"🔔 **Reminder for <@{user_id}>:** {message}"
if __name__ == "__main__":
# Check for specific reminder ID (from cron)
if len(sys.argv) > 1:
reminder_id = int(sys.argv[1])
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''SELECT id, user_id, channel_id, message, remind_at
FROM reminders WHERE id = ? AND active = 1''', (reminder_id,))
result = c.fetchone()
conn.close()
if result:
# Output JSON for the agent to use with message tool
output = {
"action": "send_reminder",
"reminder_id": result[0],
"user_id": result[1],
"channel_id": result[2],
"message": result[3],
"formatted": format_reminder_message(result[3], result[1])
}
print(json.dumps(output))
else:
print(json.dumps({"error": "Reminder not found or already sent"}))
else:
# Check for any due reminders
due = get_due_reminders()
if due:
for r in due:
output = {
"action": "send_reminder",
"reminder_id": r[0],
"user_id": r[1],
"channel_id": r[2],
"message": r[3],
"formatted": format_reminder_message(r[3], r[1])
}
print(json.dumps(output))
else:
print(json.dumps({"status": "no_due_reminders"}))

103
tools/reminder-handler.ps1 Normal file
View File

@@ -0,0 +1,103 @@
#!/usr/bin/env pwsh
<#
.SYNOPSIS
Discord Reminder Handler - Called by OpenClaw to process reminder commands
.DESCRIPTION
Handles: add, list, delete, cleanup
.EXAMPLE
.\reminder-handler.ps1 add "458667380332036117" "1474636036905631867" "Call mom" "2h"
.\reminder-handler.ps1 list
.\reminder-handler.ps1 delete 5
#>
$action = $args[0]
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$pythonScript = Join-Path $scriptDir "reminder-manager.py"
function Get-RemindersJson {
$json = & python $pythonScript "list" | Out-String
return $json | ConvertFrom-Json
}
switch ($action) {
"add" {
$userId = $args[1]
$channelId = $args[2]
$message = $args[3]
$time = $args[4]
if (-not $userId -or -not $message -or -not $time) {
Write-Error "Usage: add <user_id> <channel_id> <message> <time>"
exit 1
}
# Add to database
$result = & python $pythonScript "add" $userId $channelId $message $time | Out-String
$reminder = $result | ConvertFrom-Json
if ($reminder.error) {
Write-Error $reminder.error
exit 1
}
Write-Output "@{reminder_id:$($reminder.id)}"
}
"list" {
$userId = $args[1]
$reminders = Get-RemindersJson
if ($userId) {
$reminders = $reminders | Where-Object { $_.user_id -eq $userId }
}
if ($reminders.Count -eq 0) {
Write-Output "No active reminders."
} else {
$reminders | ForEach-Object {
$when = [datetime]::Parse($_.remind_at).ToString("MMM d '\at' h:mm tt")
Write-Output "`#$($_.id): $($_.message) - $when"
}
}
}
"delete" {
$id = $args[1]
if (-not $id) {
Write-Error "Usage: delete <reminder_id>"
exit 1
}
$result = & python $pythonScript "delete" $id | Out-String
$data = $result | ConvertFrom-Json
if ($data.deleted) {
Write-Output "Reminder#$id cancelled."
} else {
Write-Error "Reminder not found."
}
}
"cleanup" {
& python $pythonScript "cleanup"
}
default {
Write-Output @"
Discord Reminder Handler
Usage: reminder-handler.ps1 <command> [args]
Commands:
add <user_id> <channel_id> <message> <time> Add a reminder
list [user_id] List reminders
delete <id> Cancel a reminder
cleanup Remove old entries
Time formats:
20m, 2h, 1h30m - Relative time
9am, 2:30pm - Today at time
tomorrow 9am - Tomorrow at time
"@
}
}

270
tools/reminder-manager.py Normal file
View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python3
"""
Reminder Manager for OpenClaw Discord
Handles one-shot and recurring reminders via OpenClaw cron + SQLite
"""
import sqlite3
import os
import sys
import json
import re
from datetime import datetime, timedelta
from pathlib import Path
DB_PATH = os.path.expanduser("~/.openclaw/workspace/data/reminders.db")
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
def init_db():
"""Create the reminders database."""
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS reminders (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id TEXT NOT NULL,
channel_id TEXT NOT NULL,
message TEXT NOT NULL,
remind_at TEXT NOT NULL,
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
is_recurring INTEGER DEFAULT 0,
recurrence_rule TEXT,
cron_job_id TEXT,
active INTEGER DEFAULT 1
)''')
conn.commit()
conn.close()
def parse_time(time_str: str) -> datetime:
"""Parse various time formats into datetime."""
now = datetime.now()
time_str = time_str.lower().strip()
# Relative times: 20m, 2h, 1h30m
match = re.match(r'^(\d+)m$', time_str)
if match:
return now + timedelta(minutes=int(match.group(1)))
match = re.match(r'^(\d+)h$', time_str)
if match:
return now + timedelta(hours=int(match.group(1)))
match = re.match(r'^(\d+)h(\d+)m$', time_str)
if match:
return now + timedelta(hours=int(match.group(1)), minutes=int(match.group(2)))
# Tomorrow
if time_str == 'tomorrow':
return now + timedelta(days=1)
# Tomorrow at time: tomorrow 9am, tomorrow 14:00
match = re.match(r'^tomorrow\s+([\d:]+)(am|pm)?$', time_str)
if match:
time_part = match.group(1)
ampm = match.group(2)
tomorrow = now + timedelta(days=1)
if ':' in time_part:
hour, minute = map(int, time_part.split(':'))
else:
hour, minute = int(time_part), 0
if ampm == 'pm' and hour != 12:
hour += 12
if ampm == 'am' and hour == 12:
hour = 0
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
# Today at time: 9am, 14:00, 2:30pm
match = re.match(r'^(\d{1,2}):(\d{2})(am|pm)?$', time_str)
if match:
hour = int(match.group(1))
minute = int(match.group(2))
ampm = match.group(3)
if ampm == 'pm' and hour != 12:
hour += 12
if ampm == 'am' and hour == 12:
hour = 0
result = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
if result < now:
result += timedelta(days=1)
return result
match = re.match(r'^(\d{1,2})(am|pm)$', time_str)
if match:
hour = int(match.group(1))
ampm = match.group(2)
if ampm == 'pm' and hour != 12:
hour += 12
if ampm == 'am' and hour == 12:
hour = 0
result = now.replace(hour=hour, minute=0, second=0, microsecond=0)
if result < now:
result += timedelta(days=1)
return result
raise ValueError(f"Can't parse time: {time_str}")
def add_reminder(user_id: str, channel_id: str, message: str, time_str: str) -> dict:
"""Add a new reminder and schedule it via OpenClaw cron."""
init_db()
remind_at = parse_time(time_str)
if remind_at < datetime.now():
return {"error": "Reminder time is in the past"}
# Insert into DB
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''INSERT INTO reminders (user_id, channel_id, message, remind_at, active)
VALUES (?, ?, ?, ?, 1)''',
(user_id, channel_id, message, remind_at.isoformat()))
reminder_id = c.lastrowid
conn.commit()
conn.close()
# Schedule via OpenClaw cron (will be handled by caller)
return {
"id": reminder_id,
"message": message,
"remind_at": remind_at.isoformat(),
"user_id": user_id,
"channel_id": channel_id
}
def list_reminders(user_id: str = None) -> list:
"""List active reminders for a user or all users."""
init_db()
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
if user_id:
c.execute('''SELECT id, message, remind_at, channel_id
FROM reminders
WHERE user_id = ? AND active = 1 AND remind_at > datetime('now')
ORDER BY remind_at''', (user_id,))
else:
c.execute('''SELECT id, message, remind_at, channel_id, user_id
FROM reminders
WHERE active = 1 AND remind_at > datetime('now')
ORDER BY remind_at''')
results = c.fetchall()
conn.close()
return results
def delete_reminder(reminder_id: int, user_id: str = None) -> bool:
"""Delete/cancel a reminder."""
init_db()
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
if user_id:
c.execute('DELETE FROM reminders WHERE id = ? AND user_id = ?',
(reminder_id, user_id))
else:
c.execute('DELETE FROM reminders WHERE id = ?', (reminder_id,))
deleted = c.rowcount > 0
conn.commit()
conn.close()
return deleted
def delete_past_reminders():
"""Clean up old reminder entries."""
init_db()
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('DELETE FROM reminders WHERE remind_at < datetime("now", "-1 day")')
conn.commit()
conn.close()
def cron_callback(reminder_id: int):
"""Called when a cron job fires - returns the reminder details."""
init_db()
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('SELECT message, user_id, channel_id FROM reminders WHERE id = ?',
(reminder_id,))
result = c.fetchone()
if result:
# Mark as inactive after firing
c.execute('UPDATE reminders SET active = 0 WHERE id = ?', (reminder_id,))
conn.commit()
conn.close()
if result:
return {
"message": result[0],
"user_id": result[1],
"channel_id": result[2]
}
return None
if __name__ == "__main__":
command = sys.argv[1] if len(sys.argv) > 1 else "help"
if command == "add":
# Usage: reminder-manager.py add "user_id" "channel_id" "message" "time"
user_id = sys.argv[2]
channel_id = sys.argv[3]
message = sys.argv[4]
time_str = sys.argv[5]
result = add_reminder(user_id, channel_id, message, time_str)
print(json.dumps(result))
elif command == "list":
user_id = sys.argv[2] if len(sys.argv) > 2 else None
reminders = list_reminders(user_id)
print(json.dumps([{
"id": r[0],
"message": r[1],
"remind_at": r[2],
"channel_id": r[3],
"user_id": r[4] if len(r) > 4 else None
} for r in reminders]))
elif command == "delete":
reminder_id = int(sys.argv[2])
user_id = sys.argv[3] if len(sys.argv) > 3 else None
deleted = delete_reminder(reminder_id, user_id)
print(json.dumps({"deleted": deleted}))
elif command == "callback":
reminder_id = int(sys.argv[2])
result = cron_callback(reminder_id)
print(json.dumps(result) if result else "null")
elif command == "cleanup":
delete_past_reminders()
print("Cleanup complete")
else:
print("""Usage:
reminder-manager.py add "user_id" "channel_id" "message" "time"
reminder-manager.py list [user_id]
reminder-manager.py delete <reminder_id> [user_id]
reminder-manager.py callback <reminder_id>
reminder-manager.py cleanup
""")

111
tools/rom-full-scan.py Normal file
View File

@@ -0,0 +1,111 @@
import os
import hashlib
import json
from datetime import datetime
from pathlib import Path
ROM_ROOT = 'R:\\'
OUTPUT_DIR = Path('C:/Users/admin/.openclaw/workspace/rom-inventory')
HASH_LIMIT = 100 * 1024 * 1024
def get_file_hash(filepath, limit_bytes=None):
hasher = hashlib.md5()
try:
with open(filepath, 'rb') as f:
if limit_bytes:
chunk = f.read(limit_bytes)
hasher.update(chunk)
else:
for chunk in iter(lambda: f.read(8192), b''):
hasher.update(chunk)
return hasher.hexdigest()
except Exception as e:
return None
def scan_roms():
print(f'Starting full ROM scan at {datetime.now()}')
print(f'Root: {ROM_ROOT}')
all_files = []
hash_map = {}
stats = {
'total_files': 0,
'total_size': 0,
'hashed_files': 0,
'skipped_large': 0,
'errors': 0,
'by_system': {},
'by_manufacturer': {}
}
organized_path = Path(ROM_ROOT) / 'Rom Sets (Organized)'
if organized_path.exists():
for manufacturer_dir in organized_path.iterdir():
if manufacturer_dir.is_dir():
manufacturer = manufacturer_dir.name
stats['by_manufacturer'][manufacturer] = {'files': 0, 'size': 0}
for system_dir in manufacturer_dir.iterdir():
if system_dir.is_dir():
system = system_dir.name
if system not in stats['by_system']:
stats['by_system'][system] = {'files': 0, 'size': 0}
for root, dirs, files in os.walk(system_dir):
for f in files:
filepath = Path(root) / f
try:
size = filepath.stat().st_size
all_files.append({
'path': str(filepath),
'name': f,
'size': size,
'system': system,
'manufacturer': manufacturer
})
stats['total_files'] += 1
stats['total_size'] += size
stats['by_manufacturer'][manufacturer]['files'] += 1
stats['by_manufacturer'][manufacturer]['size'] += size
stats['by_system'][system]['files'] += 1
stats['by_system'][system]['size'] += size
if size <= HASH_LIMIT:
file_hash = get_file_hash(filepath)
if file_hash:
all_files[-1]['hash'] = file_hash
stats['hashed_files'] += 1
if file_hash not in hash_map:
hash_map[file_hash] = []
hash_map[file_hash].append(str(filepath))
else:
stats['skipped_large'] += 1
except Exception as e:
stats['errors'] += 1
duplicates = {h: files for h, files in hash_map.items() if len(files) > 1}
print(f'\nScan complete at {datetime.now()}')
print(f'Total files: {stats["total_files"]:,}')
print(f'Total size: {stats["total_size"] / (1024**3):.2f} GB')
print(f'Hashed: {stats["hashed_files"]:,}')
print(f'Skipped (large): {stats["skipped_large"]:,}')
print(f'Duplicates found: {len(duplicates)}')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output = {
'scan_date': datetime.now().isoformat(),
'stats': stats,
'duplicates': duplicates,
'files': all_files[:10000]
}
with open(OUTPUT_DIR / 'rom-full-scan.json', 'w') as f:
json.dump(output, f, indent=2)
print(f'\nResults saved to {OUTPUT_DIR / "rom-full-scan.json"}')
return stats
if __name__ == '__main__':
scan_roms()

156
tools/rom-inventory.py Normal file
View File

@@ -0,0 +1,156 @@
#!/usr/bin/env python3
"""ROM Library Inventory Scanner"""
import os
import json
import hashlib
from pathlib import Path
from datetime import datetime
from collections import defaultdict
ROOT = r"R:\"
OUTPUT_DIR = r"C:\Users\admin\.openclaw\workspace\rom-inventory"
def ensure_output_dir():
os.makedirs(OUTPUT_DIR, exist_ok=True)
def get_file_hash(filepath, block_size=65536):
try:
hasher = hashlib.md5()
with open(filepath, "rb") as f:
for block in iter(lambda: f.read(block_size), b""):
hasher.update(block)
return hasher.hexdigest()
except:
return None
def get_file_size(filepath):
try:
return os.path.getsize(filepath)
except:
return 0
ROM_EXTENSIONS = {
".sfc", ".nes", ".z64", ".v64", ".n64", ".gba", ".gbc", ".gb",
".nds", ".3ds", ".iso", ".bin", ".cue", ".cso", ".pbp",
".rom", ".md", ".sms", ".gg", ".sg", ".col", ".vec",
".a26", ".a52", ".a78", ".lnx", ".j64", ".jag",
".pce", ".ngp", ".ngc", ".ws", ".wsc", ".vb",
".zip", ".7z", ".rar"
}
def is_rom_file(filename):
return Path(filename).suffix.lower() in ROM_EXTENSIONS
def scan_console(console_path, console_name):
roms = []
total_size = 0
file_count = 0
console_path = Path(console_path)
if not console_path.exists():
return {"console": console_name, "error": "Path not found"}
for root, dirs, files in os.walk(console_path):
for file in files:
if is_rom_file(file):
filepath = Path(root) / file
size = get_file_size(filepath)
total_size += size
file_count += 1
file_hash = None
if size < 100 * 1024 * 1024:
file_hash = get_file_hash(filepath)
roms.append({
"name": file,
"path": str(filepath),
"size": size,
"hash": file_hash
})
return {
"console": console_name,
"file_count": file_count,
"total_size_bytes": total_size,
"total_size_gb": round(total_size / (1024**3), 2),
"roms": roms[:100]
}
def scan_all():
ensure_output_dir()
organized_path = Path(ROOT) / "Rom Sets (Organized)"
results = {
"scan_date": datetime.now().isoformat(),
"root": ROOT,
"organized": {},
"duplicates": [],
"summary": {}
}
print("Scanning Rom Sets (Organized)...")
for manufacturer in organized_path.iterdir():
if manufacturer.is_dir():
print(f" {manufacturer.name}...")
results["organized"][manufacturer.name] = {}
for console in manufacturer.iterdir():
if console.is_dir():
console_data = scan_console(console, console.name)
results["organized"][manufacturer.name][console.name] = console_data
print(f" {console.name}: {console_data.get("file_count", 0)} files")
print("
Finding duplicates...")
all_hashes = defaultdict(list)
for manufacturer, consoles in results["organized"].items():
for console, data in consoles.items():
if "roms" in data:
for rom in data["roms"]:
if rom.get("hash"):
all_hashes[rom["hash"]].append({
"name": rom["name"],
"console": console,
"path": rom["path"]
})
results["duplicates"] = [
{"hash": h, "files": files}
for h, files in all_hashes.items()
if len(files) > 1
]
total_files = sum(
data.get("file_count", 0)
for consoles in results["organized"].values()
for data in consoles.values()
)
total_size = sum(
data.get("total_size_bytes", 0)
for consoles in results["organized"].values()
for data in consoles.values()
)
results["summary"] = {
"total_roms": total_files,
"total_size_gb": round(total_size / (1024**3), 2),
"duplicate_count": len(results["duplicates"])
}
output_file = Path(OUTPUT_DIR) / f"rom-inventory-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
with open(output_file, "w") as f:
json.dump(results, f, indent=2)
print(f"
=== INVENTORY COMPLETE ===")
print(f"Total ROMs: {total_files:,}")
print(f"Total Size: {results["summary"]["total_size_gb"]:.2f} GB")
print(f"Duplicates Found: {len(results["duplicates"])}")
print(f"Output: {output_file}")
return results
if __name__ == "__main__":
scan_all()

53
tools/rom-quick-scan.py Normal file
View File

@@ -0,0 +1,53 @@
import os
import json
from pathlib import Path
from datetime import datetime
ROOT = "R:/"
OUTPUT = "C:/Users/admin/.openclaw/workspace/rom-inventory"
DISC = {"PSX", "PS2", "PS3", "PSP", "PS VITA", "Sega CD", "Sega Dreamcast", "Sega Saturn", "OG XBOX", "XBOX 360"}
EXTS = {".sfc", ".nes", ".z64", ".v64", ".n64", ".gba", ".gbc", ".gb", ".nds", ".3ds", ".iso", ".bin", ".cue", ".md", ".sms", ".gg", ".zip"}
os.makedirs(OUTPUT, exist_ok=True)
def count_folders(p):
return sum(1 for x in Path(p).iterdir() if x.is_dir() and not x.name.startswith("."))
def count_files(p):
c, s = 0, 0
for r, d, f in os.walk(p):
for x in f:
if Path(x).suffix.lower() in EXTS:
c += 1
try: s += os.path.getsize(Path(r)/x)
except: pass
return c, s
org = Path(ROOT) / "Rom Sets (Organized)"
data = {"date": datetime.now().isoformat(), "consoles": {}}
total = 0
size = 0
for mfr in org.iterdir():
if mfr.is_dir():
print(mfr.name)
for con in mfr.iterdir():
if con.is_dir():
if any(d in con.name for d in DISC):
n = count_folders(con)
data["consoles"][con.name] = {"type": "disc", "count": n}
else:
n, s = count_files(con)
data["consoles"][con.name] = {"type": "cart", "count": n, "size_gb": round(s/1e9, 2)}
size += s
total += n
data["total"] = total
data["size_gb"] = round(size/1e9, 2)
out = Path(OUTPUT) / "rom-inventory.json"
with open(out, "w") as f:
json.dump(data, f, indent=2)
print(f"Done: {total} items, {data['size_gb']} GB")
print(f"Saved: {out}")

102
tools/search_memories.py Normal file
View File

@@ -0,0 +1,102 @@
"""
Memory Vector Search CLI
========================
Search memories using semantic similarity.
Usage:
python search_memories.py "your query here"
python search_memories.py --interactive
"""
import sys
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import requests
from memory_vector import search_memories
OLLAMA_URL = "http://localhost:11434"
EMBED_MODEL = "nomic-embed-text"
def get_embedding(text: str) -> list:
"""Generate embedding for query text."""
response = requests.post(
f"{OLLAMA_URL}/api/embeddings",
json={"model": EMBED_MODEL, "prompt": text},
timeout=30
)
response.raise_for_status()
return response.json()["embedding"]
def search(query: str, k: int = 5):
"""Search memories by query."""
print(f"Searching: '{query}'\n")
# Generate embedding for query
print("Generating embedding...")
query_embedding = get_embedding(query)
# Search database
print(f"Searching {k} nearest neighbors...\n")
results = search_memories(query_embedding, k=k)
if not results:
print("No results found.")
return
print("=" * 60)
print("RESULTS")
print("=" * 60)
for i, (path, content, distance) in enumerate(results, 1):
# Clean up unicode for console
path_clean = path.encode('ascii', 'ignore').decode() if path else "Unknown"
content_clean = content.encode('ascii', 'ignore').decode() if content else ""
print(f"\n{i}. [{path_clean}]")
print(f" Distance: {distance:.4f}")
print(f" Content: {content_clean[:150]}...")
print("\n" + "=" * 60)
print(f"Found {len(results)} result(s)")
def interactive():
"""Interactive search mode."""
print("Memory Vector Search")
print("Type 'quit' to exit\n")
while True:
query = input("Search: ").strip()
if query.lower() in ('quit', 'exit', 'q'):
break
if not query:
continue
try:
search(query)
except Exception as e:
print(f"Error: {e}")
print()
def main():
import argparse
parser = argparse.ArgumentParser(description='Search memories with vector similarity')
parser.add_argument('query', nargs='?', help='Search query')
parser.add_argument('-n', '--num', type=int, default=5, help='Number of results (default: 5)')
parser.add_argument('-i', '--interactive', action='store_true', help='Interactive mode')
args = parser.parse_args()
if args.interactive or not args.query:
interactive()
else:
search(args.query, k=args.num)
if __name__ == "__main__":
main()

263
tools/session_monitor.py Normal file
View File

@@ -0,0 +1,263 @@
"""
Session Monitor
===============
Automatically tracks conversation transcripts and captures snapshots.
Runs via cron every 2 minutes.
Reads OpenClaw session transcripts from:
~/.openclaw/agents/main/sessions/*.jsonl
Usage:
python session_monitor.py
"""
import os
import sys
import json
import sqlite3
from datetime import datetime
from pathlib import Path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from memory_vector import store_memory
DB_PATH = os.path.expanduser("~/.openclaw/memory.db")
SESSIONS_DIR = os.path.expanduser("~/.openclaw/agents/main/sessions")
SNAPSHOT_THRESHOLD = 15 # Messages between snapshots
def get_db():
"""Get database connection."""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def find_session_files():
"""Find all session transcript files."""
if not os.path.exists(SESSIONS_DIR):
return []
files = []
for f in Path(SESSIONS_DIR).glob("*.jsonl"):
# Extract session ID from filename
session_id = f.stem # filename without extension
files.append({
'session_id': session_id,
'path': str(f),
'mtime': os.path.getmtime(f)
})
# Sort by most recent first
files.sort(key=lambda x: x['mtime'], reverse=True)
return files
def parse_transcript(filepath, start_index=0):
"""
Parse transcript file and extract user messages.
Returns (messages, total_lines).
"""
messages = []
if not os.path.exists(filepath):
return messages, 0
try:
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
except Exception as e:
print(f"[ERROR] Cannot read {filepath}: {e}")
return messages, 0
total_lines = len(lines)
# Process lines from start_index onwards
for i, line in enumerate(lines[start_index:], start=start_index):
if not line.strip():
continue
try:
entry = json.loads(line)
except json.JSONDecodeError:
continue
# Only count user messages
if entry.get('type') == 'message':
msg = entry.get('message', {})
if msg.get('role') == 'user':
content = msg.get('content', [])
# Extract text content
text_parts = []
for part in content:
if isinstance(part, dict) and part.get('type') == 'text':
text_parts.append(part.get('text', ''))
elif isinstance(part, str):
text_parts.append(part)
if text_parts:
messages.append({
'index': i,
'timestamp': entry.get('timestamp'),
'text': ' '.join(text_parts)
})
return messages, total_lines
def generate_summary(messages):
"""Generate a summary of messages for embedding."""
if not messages:
return None
# Simple extraction: join first 1000 chars of message texts
combined = ' | '.join([m['text'] for m in messages])
return combined[:1000] if len(combined) > 1000 else combined
def process_session(session_info):
"""
Process a single session.
Returns (snapshot_created, message_count).
"""
conn = get_db()
cursor = conn.cursor()
session_id = session_info['session_id']
filepath = session_info['path']
# Get or create tracking record
cursor.execute('''
SELECT * FROM session_tracking WHERE session_id = ?
''', (session_id,))
tracking = cursor.fetchone()
if tracking is None:
# New session
cursor.execute('''
INSERT INTO session_tracking
(session_id, transcript_path, last_message_index, messages_since_snapshot)
VALUES (?, ?, 0, 0)
''', (session_id, filepath))
conn.commit()
last_index = 0
since_snapshot = 0
else:
last_index = tracking['last_message_index']
since_snapshot = tracking['messages_since_snapshot']
# Parse new messages
messages, total_lines = parse_transcript(filepath, last_index)
if not messages:
conn.close()
return False, 0
new_count = len(messages)
since_snapshot += new_count
print(f" [{session_id[:8]}...]: {new_count} new messages, {since_snapshot} since snapshot")
snapshot_created = False
# Check if threshold reached
if since_snapshot >= SNAPSHOT_THRESHOLD:
summary = generate_summary(messages[-SNAPSHOT_THRESHOLD:])
if summary and len(summary) > 50: # Only snapshot if substantive
try:
# Get embedding from Ollama
import requests
response = requests.post(
"http://localhost:11434/api/embeddings",
json={"model": "nomic-embed-text", "prompt": summary[:2000]},
timeout=30
)
embedding = response.json()["embedding"]
# Store in database
source_path = f"session://{session_id}#{datetime.now().strftime('%H:%M')}"
store_memory(
source_type="auto_session",
source_path=source_path,
content=summary,
embedding=embedding
)
print(f" [OK] Snapshot saved: {source_path}")
since_snapshot = 0
snapshot_created = True
except Exception as e:
print(f" [ERROR] Failed to create snapshot: {e}")
else:
print(f" [SKIP] Content too short for snapshot")
since_snapshot = 0 # Reset anyway to avoid getting stuck
# Update tracking
cursor.execute('''
UPDATE session_tracking
SET last_message_index = ?,
messages_since_snapshot = ?,
last_checkpoint_time = CURRENT_TIMESTAMP,
transcript_path = ?
WHERE session_id = ?
''', (total_lines, since_snapshot, filepath, session_id))
conn.commit()
conn.close()
return snapshot_created, new_count
def cleanup_old_sessions():
"""Delete session files older than 24 hours."""
sessions_dir = Path(SESSIONS_DIR)
cutoff_time = datetime.now().timestamp() - (24 * 3600) # 24 hours
deleted_count = 0
for jsonl_file in sessions_dir.glob("*.jsonl"):
if jsonl_file.stat().st_mtime < cutoff_time:
try:
jsonl_file.unlink()
deleted_count += 1
print(f" [CLEANUP] Deleted: {jsonl_file.name}")
except Exception as e:
print(f" [CLEANUP] Failed to delete {jsonl_file.name}: {e}")
return deleted_count
def main():
"""Main entry point."""
print(f"\n{'='*60}")
print("Session Monitor")
print(f"{'='*60}")
print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
# Find session files
sessions = find_session_files()
if not sessions:
print("[INFO] No active session files found")
return
print(f"Found {len(sessions)} session file(s)")
print()
total_snapshots = 0
total_messages = 0
for session in sessions:
snapshot, count = process_session(session)
total_messages += count
if snapshot:
total_snapshots += 1
# Cleanup old sessions
deleted = cleanup_old_sessions()
print()
print(f"{'='*60}")
print(f"Summary: {total_messages} messages, {total_snapshots} snapshots, {deleted} files cleaned")
print(f"{'='*60}\n")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,55 @@
"""
Session Memory Snapshotter
==========================
Auto-summarizes conversation and embeds it.
Called every 15 messages.
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from datetime import datetime
from memory_vector import store_memory
import requests
OLLAMA_URL = "http://localhost:11434"
EMBED_MODEL = "nomic-embed-text"
def get_embedding(text: str) -> list:
"""Generate embedding via Ollama."""
response = requests.post(
f"{OLLAMA_URL}/api/embeddings",
json={"model": EMBED_MODEL, "prompt": text[:2000]},
timeout=30
)
response.raise_for_status()
return response.json()["embedding"]
def save_snapshot(summary: str, participants: str = "Corey, Alex"):
"""Save a conversation snapshot with embedding."""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
# Generate embedding
embedding = get_embedding(summary)
# Store in database
source_path = f"session://{datetime.now().strftime('%Y-%m-%d')}#{timestamp}"
store_memory(
source_type="session_snapshot",
source_path=source_path,
content=summary,
embedding=embedding
)
return source_path
if __name__ == "__main__":
# Called with summary as argument
if len(sys.argv) < 2:
print("Usage: python session_snapshotter.py 'summary text'")
sys.exit(1)
summary = sys.argv[1]
path = save_snapshot(summary)
print(f"[OK] Snapshot saved: {path}")

369
tools/shipping_tracker.py Normal file
View File

@@ -0,0 +1,369 @@
#!/usr/bin/env python3
"""Shipping Tracker - Checks Proton Mail for tracking emails"""
import socket
import sqlite3
import re
import json
import os
from email import policy
from email.parser import BytesParser
from datetime import datetime
from urllib.parse import urlparse
# Config
IMAP_HOST = "127.0.0.1"
IMAP_PORT = 1143
USERNAME = "alexthenerdyai@proton.me"
PASSWORD = "8yiNBTJBMc6HyOQjIZKjMw"
DB_PATH = os.path.expanduser("~/.openclaw/workspace/data/shipping.db")
CHANNEL_ID = "1473701182076752135"
# Tracking patterns
CARRIERS = {
'ups': {
'patterns': [r'1Z[A-Z0-9]{16}', r'\b1Z[0-9A-Z]{16}\b'],
'domains': ['ups.com', 'email.ups.com'],
'name': 'UPS'
},
'fedex': {
'patterns': [r'\b\d{12}\b', r'\b\d{14}\b', r'\b\d{20}\b'],
'domains': ['fedex.com', 'email.fedex.com'],
'name': 'FedEx'
},
'usps': {
'patterns': [r'\b\d{20,22}\b', r'\b9\d{15,21}\b', r'\b[A-Z]{2}\d{9}[A-Z]{2}\b'],
'domains': ['usps.com', 'email.usps.gov'],
'name': 'USPS'
},
'dhl': {
'patterns': [r'\b\d{10}\b', r'\b\d{11}\b'],
'domains': ['dhl.com', 'dhl-usa.com'],
'name': 'DHL'
},
'amazon': {
'patterns': [r'\b\d{12,14}\b'],
'domains': ['amazon.com', 'amazon.ca', 'amazon.co.uk'],
'name': 'Amazon Logistics'
},
'ontrac': {
'patterns': [r'\bC\d{14}\b', r'\bD\d{14}\b'],
'domains': ['ontrac.com'],
'name': 'OnTrac'
},
'lasership': {
'patterns': [r'\b1LS\d{12}\b', r'\bLX\d{8,12}\b'],
'domains': ['lasership.com'],
'name': 'LaserShip'
}
}
# Date patterns for estimated delivery
DATE_PATTERNS = [
r'(?:arriving|delivery|delivered by|estimated delivery)[\s:]*(?:on|by)?\s*(monday|tuesday|wednesday|thursday|friday|saturday|sunday)?[,\s]*([a-z]+)\s+(\d{1,2})[,\s]+(\d{4})',
r'(?:arrives|delivery on|by)\s+(\w+)\s+(\d{1,2})',
r'(?:expected|estimated|delivery)[\s:]*(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{2,4})',
r'(?:will arrive|delivered)\s+(?:on\s+)?(\w+day),?\s*(\w+)\s*(\d{1,2})',
]
def init_db():
"""Initialize SQLite database"""
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
conn = sqlite3.connect(DB_PATH)
conn.execute('''
CREATE TABLE IF NOT EXISTS shipments (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT UNIQUE,
sender TEXT,
sender_email TEXT,
subject TEXT,
carrier TEXT,
tracking_number TEXT,
estimated_delivery TEXT,
tracking_url TEXT,
received_at TEXT,
notified_at TEXT,
delivered INTEGER DEFAULT 0
)
''')
conn.commit()
conn.close()
def get_response(sock, tag):
"""Read IMAP response until tag"""
response = b""
while True:
chunk = sock.recv(16384)
if not chunk:
break
response += chunk
if tag.encode() in response:
break
return response
def extract_rfc822(data):
"""Extract email from IMAP FETCH response"""
lines = data.split(b'\r\n')
email_lines = []
collect = False
for line in lines:
if b'FETCH' in line and b'RFC822' in line:
collect = True
continue
if line.startswith((b'A4 ', b'A5 ', b'A6 ', b'A7 ')) and b'OK' in line:
break
if collect:
email_lines.append(line)
return b'\r\n'.join(email_lines)
def strip_html(html):
"""Convert HTML to plain text"""
import re
html = re.sub(r'<(style|script)[^>]*>[^<]*</\1>', ' ', html, flags=re.IGNORECASE | re.DOTALL)
html = re.sub(r'</(p|div|h[1-6]|li|tr|br)>', '\n', html, flags=re.IGNORECASE)
html = re.sub(r'<[^>]+>', ' ', html)
html = re.sub(r'\s+', ' ', html)
return html.strip()
def parse_email(msg):
"""Extract email content"""
body = ''
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
if content_type == 'text/plain':
payload = part.get_payload(decode=True)
if payload:
body += payload.decode('utf-8', errors='ignore')
elif content_type == 'text/html':
payload = part.get_payload(decode=True)
if payload:
body += strip_html(payload.decode('utf-8', errors='ignore'))
else:
payload = msg.get_payload(decode=True)
if payload:
body = payload.decode('utf-8', errors='ignore')
return body
def detect_carrier(from_email, body, subject):
"""Detect carrier from email domain and content"""
domain = from_email.split('@')[-1].lower() if '@' in from_email else ''
text = f"{subject} {body}".lower()
for carrier_id, info in CARRIERS.items():
# Check domain
if any(d in domain for d in info['domains']):
return carrier_id
# Check for carrier name in text
if carrier_id in text or info['name'].lower() in text:
return carrier_id
return None
def extract_tracking(body, carrier_id):
"""Extract tracking number from email body"""
if not carrier_id or carrier_id not in CARRIERS:
# Try all patterns
for cid, info in CARRIERS.items():
for pattern in info['patterns']:
match = re.search(pattern, body, re.IGNORECASE)
if match:
return match.group(0), cid
return None, None
# Use carrier-specific patterns
for pattern in CARRIERS[carrier_id]['patterns']:
match = re.search(pattern, body, re.IGNORECASE)
if match:
return match.group(0), carrier_id
return None, carrier_id
def extract_delivery_date(body):
"""Extract estimated delivery date from email"""
text = body.lower()
months = ['january', 'february', 'march', 'april', 'may', 'june',
'july', 'august', 'september', 'october', 'november', 'december',
'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
for pattern in DATE_PATTERNS:
match = re.search(pattern, text)
if match:
return match.group(0).strip()
return None
def get_tracking_url(carrier, tracking_num):
"""Generate tracking URL for carrier"""
urls = {
'ups': f'https://www.ups.com/track?tracknum={tracking_num}',
'fedex': f'https://www.fedex.com/fedextrack/?trknbr={tracking_num}',
'usps': f'https://tools.usps.com/go/TrackConfirmAction?tLabels={tracking_num}',
'dhl': f'https://www.dhl.com/en/express/tracking.html?AWB={tracking_num}',
'amazon': f'https://track.amazon.com/tracking/{tracking_num}',
'ontrac': f'https://www.ontrac.com/tracking/?number={tracking_num}',
'lasership': f'https://www.lasership.com/track/{tracking_num}'
}
return urls.get(carrier, None)
def process_message(sock, msg_num, conn):
"""Process a single email message"""
# Fetch full message
sock.send(f'A4 FETCH {msg_num} RFC822\r\n'.encode())
resp = get_response(sock, "A4 OK")
email_data = extract_rfc822(resp)
if not email_data:
return None
try:
msg = BytesParser(policy=policy.default).parsebytes(email_data)
except:
return None
message_id = msg.get('Message-ID', '').strip('<>')
if not message_id:
return None
# Check if already processed
cursor = conn.execute('SELECT 1 FROM shipments WHERE message_id = ?', (message_id,))
if cursor.fetchone():
return None
# Parse email
sender_email = msg.get('From', '')
sender_name = sender_email.split('<')[0].strip() if '<' in sender_email else sender_email
subject = msg.get('Subject', '(No Subject)')
body = parse_email(msg)
# Detect carrier and tracking
carrier_id = detect_carrier(sender_email, body, subject)
tracking_num, carrier_id = extract_tracking(body, carrier_id)
if not tracking_num:
return None
est_delivery = extract_delivery_date(body)
tracking_url = get_tracking_url(carrier_id, tracking_num)
# Save to database
carrier_name = CARRIERS.get(carrier_id, {}).get('name', carrier_id.upper())
conn.execute('''
INSERT INTO shipments (message_id, sender, sender_email, subject, carrier,
tracking_number, estimated_delivery, tracking_url,
received_at, notified_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
message_id, sender_name, sender_email, subject, carrier_name,
tracking_num, est_delivery, tracking_url,
datetime.now().isoformat(), datetime.now().isoformat()
))
conn.commit()
return {
'sender': sender_name,
'carrier': carrier_name,
'tracking': tracking_num,
'estimated': est_delivery or 'Unknown',
'url': tracking_url,
'subject': subject
}
def format_notification(shipment):
"""Format shipment for Discord notification"""
lines = [
f"\ud83d\ude9a **New Shipment Detected**",
f"",
f"**From:** {shipment['sender']}",
f"**Carrier:** {shipment['carrier']}",
f"**Tracking:** `{shipment['tracking']}`",
f"**Est. Delivery:** {shipment['estimated']}"
]
if shipment['url']:
lines.append(f"**Track:** <{shipment['url']}>")
return '\n'.join(lines)
def send_discord_notification(content, channel_id):
"""Send notification to Discord (placeholder - will use message tool or sessions)"""
# This will be called via sessions_send or we can write to a file for cron
print(f"[NOTIFY] Channel {channel_id}: {content[:100]}...")
return True
def main():
init_db()
conn = sqlite3.connect(DB_PATH)
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] Checking for new shipments...")
try:
# Connect to IMAP
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(15)
sock.connect((IMAP_HOST, IMAP_PORT))
sock.recv(1024)
# Login
sock.send(f'A1 LOGIN "{USERNAME}" "{PASSWORD}"\r\n'.encode())
get_response(sock, "A1 OK")
# Select INBOX
sock.send(b'A2 SELECT "INBOX"\r\n')
get_response(sock, "A2 OK")
# Search unread messages
sock.send(b'A3 SEARCH UNSEEN\r\n')
resp = get_response(sock, "A3 OK").decode()
msg_nums = []
for line in resp.split('\r\n'):
if 'SEARCH' in line and '*' in line:
parts = line.split('SEARCH')
if len(parts) > 1:
msg_nums = [n for n in parts[1].strip().split() if n.isdigit()]
if not msg_nums:
print("[INFO] No unread messages")
conn.close()
return []
print(f"[INFO] {len(msg_nums)} unread messages to check")
shipments = []
for msg_num in msg_nums:
shipment = process_message(sock, msg_num, conn)
if shipment:
shipments.append(shipment)
print(f"[FOUND] {shipment['carrier']} - {shipment['tracking']}")
# Logout
sock.send(b'A5 LOGOUT\r\n')
sock.close()
# Output results for Discord
if shipments:
output = {'channel': CHANNEL_ID, 'shipments': shipments}
output_path = os.path.expanduser('~/.openclaw/workspace/data/shipping_output.json')
with open(output_path, 'w') as f:
json.dump(output, f)
print(f"[INFO] Found {len(shipments)} shipments, saved to {output_path}")
else:
print("[INFO] No new shipments found")
return shipments
except Exception as e:
print(f"[ERR] {e}")
import traceback
traceback.print_exc()
return []
finally:
conn.close()
if __name__ == "__main__":
shipments = main()
if shipments:
for s in shipments:
print("\n" + format_notification(s))

View File

@@ -0,0 +1,231 @@
#!/usr/bin/env python3
"""Shipping Tracker with Discord notifications"""
import socket
import sqlite3
import re
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from email import policy
from email.parser import BytesParser
from datetime import datetime
# Config
IMAP_HOST = "127.0.0.1"
IMAP_PORT = 1143
USERNAME = "alexthenerdyai@proton.me"
PASSWORD = "8yiNBTJBMc6HyOQjIZKjMw"
DB_PATH = os.path.expanduser("~/.openclaw/workspace/data/shipping.db")
CHANNEL_ID = "1473701182076752135"
CARRIERS = {
'ups': {'patterns': [r'1Z[A-Z0-9]{16}'], 'domains': ['ups.com'], 'name': 'UPS'},
'fedex': {'patterns': [r'\b\d{12,20}\b'], 'domains': ['fedex.com'], 'name': 'FedEx'},
'usps': {'patterns': [r'\b\d{20,22}\b', r'\b9\d{15,21}\b'], 'domains': ['usps.com'], 'name': 'USPS'},
'dhl': {'patterns': [r'\b\d{10,11}\b'], 'domains': ['dhl.com'], 'name': 'DHL'},
'amazon': {'patterns': [r'\b\d{12,14}\b'], 'domains': ['amazon.com'], 'name': 'Amazon'},
}
def init_db():
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
conn = sqlite3.connect(DB_PATH)
conn.execute('''CREATE TABLE IF NOT EXISTS shipments (
id INTEGER PRIMARY KEY, message_id TEXT UNIQUE, sender TEXT, carrier TEXT,
tracking_number TEXT, estimated_delivery TEXT, tracking_url TEXT,
received_at TEXT, notified_at TEXT)''')
conn.commit()
conn.close()
def get_response(sock, tag):
response = b""
while True:
chunk = sock.recv(16384)
if not chunk: break
response += chunk
if tag.encode() in response: break
return response
def extract_rfc822(data):
lines = data.split(b'\r\n')
email_lines, collect = [], False
for line in lines:
if b'FETCH' in line and b'RFC822' in line:
collect = True
continue
if line.startswith((b'A4 ', b'A5 ')) and b'OK' in line:
break
if collect:
email_lines.append(line)
return b'\r\n'.join(email_lines)
def strip_html(html):
import re
html = re.sub(r'<(style|script)[^>]*>[^<]*</\1>', ' ', html, flags=re.I|re.S)
html = re.sub(r'</(p|div|h[1-6]|li|tr|br)>', '\n', html, flags=re.I)
html = re.sub(r'<[^>]+>', ' ', html)
return re.sub(r'\s+', ' ', html).strip()
def parse_email(msg):
body = ''
if msg.is_multipart():
for part in msg.walk():
ct = part.get_content_type()
payload = part.get_payload(decode=True)
if not payload: continue
if ct == 'text/plain':
body += payload.decode('utf-8', errors='ignore')
elif ct == 'text/html':
body += strip_html(payload.decode('utf-8', errors='ignore'))
else:
payload = msg.get_payload(decode=True)
if payload:
body = payload.decode('utf-8', errors='ignore')
return body
def detect_carrier(from_email, body, subject):
domain = from_email.split('@')[-1].lower() if '@' in from_email else ''
text = f"{subject} {body}".lower()
for cid, info in CARRIERS.items():
if any(d in domain for d in info['domains']): return cid
if cid in text or info['name'].lower() in text: return cid
return None
def extract_tracking(body, carrier_id):
if not carrier_id or carrier_id not in CARRIERS:
for cid, info in CARRIERS.items():
for pat in info['patterns']:
m = re.search(pat, body, re.I)
if m: return m.group(0), cid
return None, None
for pat in CARRIERS[carrier_id]['patterns']:
m = re.search(pat, body, re.I)
if m: return m.group(0), carrier_id
return None, carrier_id
def extract_delivery(body):
patterns = [
r'(?:arriving|delivery|delivered by|estimated delivery)[\s:]*(?:on|by)?\s*(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)?[,\s]*([a-z]+)\s+(\d{1,2})[,\s]+(\d{4})',
r'(?:arrives|delivery on|by)\s+(\w+)\s+(\d{1,2})',
]
for pat in patterns:
m = re.search(pat, body.lower())
if m: return m.group(0).strip()
return None
def get_tracking_url(carrier, num):
urls = {
'ups': f'https://www.ups.com/track?tracknum={num}',
'fedex': f'https://www.fedex.com/fedextrack/?trknbr={num}',
'usps': f'https://tools.usps.com/go/TrackConfirmAction?tLabels={num}',
'dhl': f'https://www.dhl.com/en/express/tracking.html?AWB={num}',
'amazon': f'https://track.amazon.com/tracking/{num}'
}
return urls.get(carrier)
def process_message(sock, msg_num, conn):
sock.send(f'A4 FETCH {msg_num} RFC822\r\n'.encode())
resp = get_response(sock, "A4 OK")
email_data = extract_rfc822(resp)
if not email_data: return None
try:
msg = BytesParser(policy=policy.default).parsebytes(email_data)
except: return None
message_id = msg.get('Message-ID', '').strip('<>')
if not message_id: return None
cursor = conn.execute('SELECT 1 FROM shipments WHERE message_id = ?', (message_id,))
if cursor.fetchone(): return None
sender_email = msg.get('From', '')
sender_name = sender_email.split('<')[0].strip() if '<' in sender_email else sender_email
subject = msg.get('Subject', '(No Subject)')
body = parse_email(msg)
carrier_id = detect_carrier(sender_email, body, subject)
tracking_num, carrier_id = extract_tracking(body, carrier_id)
if not tracking_num: return None
est_delivery = extract_delivery(body)
tracking_url = get_tracking_url(carrier_id, tracking_num)
carrier_name = CARRIERS.get(carrier_id, {}).get('name', carrier_id.upper())
conn.execute('''INSERT INTO shipments (message_id, sender, carrier, tracking_number,
estimated_delivery, tracking_url, received_at, notified_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)''',
(message_id, sender_name, carrier_name, tracking_num, est_delivery,
tracking_url, datetime.now().isoformat(), datetime.now().isoformat()))
conn.commit()
return {
'sender': sender_name, 'carrier': carrier_name, 'tracking': tracking_num,
'estimated': est_delivery or 'Unknown', 'url': tracking_url
}
def format_notification(s):
lines = [
f"📦 **New Shipment Detected**",
f"",
f"**From:** {s['sender']}",
f"**Carrier:** {s['carrier']}",
f"**Tracking:** `{s['tracking']}`",
f"**Est. Delivery:** {s['estimated']}"
]
if s['url']:
lines.append(f"**Track:** <{s['url']}>")
return '\n'.join(lines)
def main():
init_db()
conn = sqlite3.connect(DB_PATH)
shipments = []
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(15)
sock.connect((IMAP_HOST, IMAP_PORT))
sock.recv(1024)
sock.send(f'A1 LOGIN "{USERNAME}" "{PASSWORD}"\r\n'.encode())
get_response(sock, "A1 OK")
sock.send(b'A2 SELECT "INBOX"\r\n')
get_response(sock, "A2 OK")
sock.send(b'A3 SEARCH UNSEEN\r\n')
resp = get_response(sock, "A3 OK").decode()
msg_nums = []
for line in resp.split('\r\n'):
if 'SEARCH' in line and '*' in line:
parts = line.split('SEARCH')
if len(parts) > 1:
msg_nums = [n for n in parts[1].strip().split() if n.isdigit()]
for msg_num in msg_nums:
s = process_message(sock, msg_num, conn)
if s: shipments.append(s)
sock.send(b'A5 LOGOUT\r\n')
sock.close()
except Exception as e:
print(f"Error: {e}")
finally:
conn.close()
# Output for Discord
if shipments:
for s in shipments:
print(format_notification(s))
print("---")
else:
print("No new shipments found.")
return shipments
if __name__ == "__main__":
main()

152
tools/youtube-summarizer.py Normal file
View File

@@ -0,0 +1,152 @@
#!/usr/bin/env python3
"""
YouTube Video Summarizer
Extracts transcripts and generates bullet summaries with timestamps
"""
import sys
import re
import json
import subprocess
import tempfile
import os
from urllib.parse import urlparse, parse_qs
def extract_video_id(url):
"""Extract YouTube video ID from various URL formats"""
patterns = [
r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
r'youtube\.com/watch\?.*v=([a-zA-Z0-9_-]{11})',
r'youtube\.com/shorts/([a-zA-Z0-9_-]{11})',
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def get_transcript_yt_dlp(video_id):
"""Get transcript using yt-dlp"""
try:
# Use system temp directory (cross-platform)
temp_dir = tempfile.gettempdir()
base_path = os.path.join(temp_dir, f'yt_{video_id}')
result = subprocess.run(
['yt-dlp', '--write-auto-sub', '--skip-download',
'--sub-langs', 'en,en-en', '--convert-subs', 'srt',
'-o', base_path, f'https://www.youtube.com/watch?v={video_id}'],
capture_output=True,
text=True,
timeout=60
)
# Read subtitle file if created - try multiple formats
sub_files = [
f'{base_path}.en.srt',
f'{base_path}.en.vtt',
f'{base_path}.en-en.srt',
f'{base_path}.en-en.vtt'
]
for sub_file in sub_files:
try:
with open(sub_file, 'r', encoding='utf-8') as f:
return f.read()
except FileNotFoundError:
continue
return None
except Exception as e:
return f"Error: {e}"
def parse_srt(srt_content):
"""Parse SRT content into text with timestamps"""
if not srt_content:
return []
entries = []
blocks = srt_content.strip().split('\n\n')
for block in blocks:
lines = block.split('\n')
if len(lines) >= 3:
# Line 1: index number
# Line 2: timestamp (00:00:00,000 --> 00:00:05,000)
# Line 3+: text
timestamp_line = lines[1]
text = ' '.join(lines[2:])
# Extract start time
start_time = timestamp_line.split(' --> ')[0].replace(',', '.')
entries.append({
'time': start_time,
'text': text
})
return entries
def chunk_and_summarize(entries, chunk_size=2000):
"""Chunk transcript and return summary format"""
if not entries:
return "No transcript available for this video."
full_text = ' '.join([e['text'] for e in entries])
# Sample every Nth entry for key points
sampled = entries[::max(1, len(entries)//10)]
result = []
result.append("## Transcript Summary")
result.append("")
result.append(f"**Total entries:** {len(entries)}")
result.append(f"**Duration:** ~{entries[-1]['time'][:5] if entries else 'Unknown'}")
result.append("")
result.append("### Key Points with Timestamps")
result.append("")
for entry in sampled[:15]: # Top 15 samples
time = entry['time'][:5] # MM:SS
text = entry['text'][:100] # First 100 chars
result.append(f"- **{time}** -- {text}...")
result.append("")
result.append("### Full Context (first 2000 chars)")
result.append(full_text[:2000] + "..." if len(full_text) > 2000 else full_text)
return "\n".join(result)
def main():
# Fix Windows encoding issues
import sys
sys.stdout.reconfigure(encoding='utf-8')
if len(sys.argv) < 2:
print("Usage: youtube-summarizer.py <youtube_url>")
sys.exit(1)
url = sys.argv[1]
video_id = extract_video_id(url)
if not video_id:
print(f"ERROR: Could not extract video ID from: {url}")
print("Supported formats: youtube.com/watch?v=ID, youtu.be/ID, youtube.com/shorts/ID")
sys.exit(1)
print(f"Processing video: {video_id}")
srt_content = get_transcript_yt_dlp(video_id)
if srt_content and not srt_content.startswith("Error"):
entries = parse_srt(srt_content)
summary = chunk_and_summarize(entries)
print(summary)
elif srt_content is None:
print(f"No transcript available for video: {video_id}")
print("This video may not have auto-generated captions, or they may be restricted.")
print("Try a different video with visible captions enabled.")
else:
print(f"ERROR: Failed to download transcript")
print(f"Details: {srt_content}")
if __name__ == '__main__':
main()