Fresh start - excluded large ROM JSON files
This commit is contained in:
BIN
tools/__pycache__/memory_embedding_worker.cpython-313.pyc
Normal file
BIN
tools/__pycache__/memory_embedding_worker.cpython-313.pyc
Normal file
Binary file not shown.
102
tools/bulk_memory_loader.py
Normal file
102
tools/bulk_memory_loader.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Bulk Memory Loader
|
||||
================
|
||||
Loads all historical memory files into vector database.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import glob
|
||||
from datetime import datetime
|
||||
from memory_vector import setup_memory_vectors, store_memory, MemoryVectorDB
|
||||
from tools.memory_embedding_worker import process_memory_file
|
||||
|
||||
def get_all_memory_files():
|
||||
"""Get all memory files to bulk load."""
|
||||
workspace = os.path.expanduser("~/.openclaw/workspace")
|
||||
files = []
|
||||
|
||||
# 1. Daily notes - all of them
|
||||
daily_pattern = os.path.join(workspace, "memory", "202*.md")
|
||||
for f in glob.glob(daily_pattern):
|
||||
fname = os.path.basename(f)
|
||||
# Only 2025 and 2026 files
|
||||
if fname.startswith(('2025-', '2026-')):
|
||||
files.append((f, "daily"))
|
||||
|
||||
# 2. MEMORY.md
|
||||
memory_md = os.path.join(workspace, "MEMORY.md")
|
||||
if os.path.exists(memory_md):
|
||||
files.append((memory_md, "memory_md"))
|
||||
|
||||
# 3. Project files
|
||||
projects_pattern = os.path.join(workspace, "memory", "projects", "*.md")
|
||||
for f in glob.glob(projects_pattern):
|
||||
files.append((f, "project"))
|
||||
|
||||
# Sort by date (newest first for dailies)
|
||||
files.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def bulk_load():
|
||||
"""Run bulk load."""
|
||||
print("=" * 60)
|
||||
print("BULK MEMORY LOADER")
|
||||
print("=" * 60)
|
||||
print(f"Started: {datetime.now().strftime('%H:%M:%S')}")
|
||||
print()
|
||||
|
||||
# Setup
|
||||
setup_memory_vectors()
|
||||
print("[OK] Database ready\n")
|
||||
|
||||
# Get files
|
||||
files = get_all_memory_files()
|
||||
print(f"Found {len(files)} files to process:\n")
|
||||
|
||||
# Show first 10
|
||||
for f, t in files[:10]:
|
||||
print(f" - {os.path.basename(f)} ({t})")
|
||||
if len(files) > 10:
|
||||
print(f" ... and {len(files) - 10} more\n")
|
||||
|
||||
# Process each
|
||||
total_entries = 0
|
||||
failed_files = []
|
||||
|
||||
for i, (filepath, source_type) in enumerate(files, 1):
|
||||
print(f"\n[{i}/{len(files)}] {os.path.basename(filepath)}")
|
||||
try:
|
||||
entries = process_memory_file(filepath, source_type)
|
||||
total_entries += entries
|
||||
print(f" Created {entries} entries")
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
failed_files.append(filepath)
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("BULK LOAD COMPLETE")
|
||||
print("=" * 60)
|
||||
print(f"Files processed: {len(files)}")
|
||||
print(f"Total embeddings: {total_entries}")
|
||||
if failed_files:
|
||||
print(f"Failed files: {len(failed_files)}")
|
||||
for f in failed_files:
|
||||
print(f" - {f}")
|
||||
print(f"Finished: {datetime.now().strftime('%H:%M:%S')}")
|
||||
print("=" * 60)
|
||||
|
||||
return {
|
||||
"files": len(files),
|
||||
"entries": total_entries,
|
||||
"failed": len(failed_files)
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bulk_load()
|
||||
6
tools/check_cron.py
Normal file
6
tools/check_cron.py
Normal file
@@ -0,0 +1,6 @@
|
||||
import sqlite3
|
||||
conn = sqlite3.connect(r'C:\Users\admin\.openclaw\cron.db')
|
||||
c = conn.cursor()
|
||||
c.execute('''SELECT name FROM sqlite_master WHERE type=''table''''')
|
||||
for r in c.fetchall():
|
||||
print(r)
|
||||
249
tools/memory_embedding_worker.py
Normal file
249
tools/memory_embedding_worker.py
Normal file
@@ -0,0 +1,249 @@
|
||||
"""
|
||||
Memory Embedding Worker
|
||||
=======================
|
||||
Process memory files and store with embeddings in SQLite.
|
||||
Runs as cron job or standalone.
|
||||
|
||||
Usage:
|
||||
python memory_embedding_worker.py [--date YYYY-MM-DD]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
# Add parent dir to path for memory_vector
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import json
|
||||
import re
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Tuple, Optional
|
||||
from memory_vector import setup_memory_vectors, store_memory, MemoryVectorDB
|
||||
|
||||
# Ollama configuration
|
||||
OLLAMA_URL = "http://localhost:11434"
|
||||
EMBED_MODEL = "nomic-embed-text"
|
||||
CHUNK_SIZE = 512 # Characters per chunk
|
||||
|
||||
|
||||
def generate_embedding(text: str) -> Optional[List[float]]:
|
||||
"""Generate embedding using Ollama."""
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{OLLAMA_URL}/api/embeddings",
|
||||
json={
|
||||
"model": EMBED_MODEL,
|
||||
"prompt": text[:8192] # Truncate if too long
|
||||
},
|
||||
timeout=60
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["embedding"]
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to generate embedding: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def chunk_text(text: str, max_chars: int = CHUNK_SIZE) -> List[str]:
|
||||
"""Split text into chunks for embedding."""
|
||||
# Split by paragraphs first
|
||||
paragraphs = text.split('\n\n')
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
|
||||
for para in paragraphs:
|
||||
if len(current_chunk) + len(para) + 2 <= max_chars:
|
||||
current_chunk += f"\n\n{para}" if current_chunk else para
|
||||
else:
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
current_chunk = para
|
||||
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
|
||||
return chunks if chunks else [text[:max_chars]]
|
||||
|
||||
|
||||
def extract_sections(content: str) -> List[Tuple[str, str]]:
|
||||
"""Extract titled sections from markdown content."""
|
||||
sections = []
|
||||
|
||||
# Look for ## headers
|
||||
pattern = r'#{2,3}\s+(.+?)\n(.*?)(?=#{2,3}\s+|\Z)'
|
||||
matches = re.findall(pattern, content, re.DOTALL)
|
||||
|
||||
for title, body in matches:
|
||||
title = title.strip()
|
||||
body = body.strip()
|
||||
if body and len(body) > 50: # Skip empty or tiny sections
|
||||
sections.append((title, body))
|
||||
|
||||
# If no sections found, treat whole content as one section
|
||||
if not sections and content.strip():
|
||||
sections.append(("General", content.strip()))
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
def process_memory_file(filepath: str, source_type: str) -> int:
|
||||
"""
|
||||
Process a single memory file and store with embeddings.
|
||||
Returns number of entries created.
|
||||
|
||||
Args:
|
||||
filepath: Path to the memory file
|
||||
source_type: 'daily', 'memory_md', 'project', etc.
|
||||
"""
|
||||
if not os.path.exists(filepath):
|
||||
print(f"[SKIP] File not found: {filepath}")
|
||||
return 0
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
|
||||
content = f.read()
|
||||
|
||||
# Get filename as source_path reference
|
||||
source_path = os.path.basename(filepath)
|
||||
|
||||
# Extract sections
|
||||
sections = extract_sections(content)
|
||||
entries_created = 0
|
||||
|
||||
print(f"[PROCESS] {source_path}: {len(sections)} sections found")
|
||||
|
||||
for section_title, section_content in sections:
|
||||
# Create chunks
|
||||
chunks = chunk_text(section_content)
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
# Generate embedding
|
||||
embedding = generate_embedding(chunk)
|
||||
|
||||
if embedding:
|
||||
# Store in database
|
||||
chunk_label = f"{section_title} (chunk {i+1}/{len(chunks)})" if len(chunks) > 1 else section_title
|
||||
store_memory(
|
||||
source_type=source_type,
|
||||
source_path=f"{source_path}#{chunk_label}",
|
||||
content=chunk[:500], # Store preview
|
||||
embedding=embedding
|
||||
)
|
||||
entries_created += 1
|
||||
print(f" [STORED] Chunk {(i+1)}/{len(chunks)}")
|
||||
else:
|
||||
print(f" [FAILED] Chunk {(i+1)}/{len(chunks)}")
|
||||
|
||||
return entries_created
|
||||
|
||||
|
||||
def get_memory_files(date: Optional[str] = None) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Get list of memory files to process.
|
||||
Returns list of (filepath, source_type) tuples.
|
||||
"""
|
||||
files = []
|
||||
workspace = os.path.expanduser("~/.openclaw/workspace")
|
||||
|
||||
if date:
|
||||
# Specific date
|
||||
daily_path = os.path.join(workspace, "memory", f"{date}.md")
|
||||
if os.path.exists(daily_path):
|
||||
files.append((daily_path, "daily"))
|
||||
else:
|
||||
# Yesterday's daily note
|
||||
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
daily_path = os.path.join(workspace, "memory", f"{yesterday}.md")
|
||||
if os.path.exists(daily_path):
|
||||
files.append((daily_path, "daily"))
|
||||
|
||||
# Also check today's file (in case running during day)
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
today_path = os.path.join(workspace, "memory", f"{today}.md")
|
||||
if os.path.exists(today_path) and today_path != daily_path:
|
||||
files.append((today_path, "daily"))
|
||||
|
||||
# Always include MEMORY.md (only process if modified recently?)
|
||||
memory_md = os.path.join(workspace, "MEMORY.md")
|
||||
if os.path.exists(memory_md):
|
||||
# Check if modified in last 24h
|
||||
mtime = datetime.fromtimestamp(os.path.getmtime(memory_md))
|
||||
if datetime.now() - mtime < timedelta(hours=24):
|
||||
files.append((memory_md, "memory_md"))
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def run_daily_sync(date: Optional[str] = None, dry_run: bool = False) -> dict:
|
||||
"""
|
||||
Run the daily memory embedding sync.
|
||||
|
||||
Args:
|
||||
date: Specific date to process (YYYY-MM-DD), or None for yesterday
|
||||
dry_run: If True, don't actually store embeddings
|
||||
|
||||
Returns:
|
||||
Dict with stats
|
||||
"""
|
||||
print("=" * 50)
|
||||
print("Memory Embedding Worker")
|
||||
print("=" * 50)
|
||||
|
||||
# Setup database
|
||||
if not dry_run:
|
||||
setup_memory_vectors()
|
||||
print("[OK] Database ready\n")
|
||||
else:
|
||||
print("[DRY RUN] No database changes\n")
|
||||
|
||||
# Get files to process
|
||||
files = get_memory_files(date)
|
||||
|
||||
if not files:
|
||||
print("[INFO] No memory files to process")
|
||||
return {"files": 0, "entries": 0}
|
||||
|
||||
print(f"[INFO] Processing {len(files)} file(s):\n")
|
||||
for f, t in files:
|
||||
print(f" - {f} ({t})")
|
||||
print()
|
||||
|
||||
# Process each file
|
||||
total_entries = 0
|
||||
stats = {"files": len(files), "entries": 0, "failed": 0}
|
||||
|
||||
for filepath, source_type in files:
|
||||
print(f"\n[FILE] {os.path.basename(filepath)}")
|
||||
entries = process_memory_file(filepath, source_type)
|
||||
total_entries += entries
|
||||
print(f" Created {entries} embedding entries")
|
||||
|
||||
stats["entries"] = total_entries
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 50)
|
||||
print("SUMMARY")
|
||||
print("=" * 50)
|
||||
print(f"Files processed: {stats['files']}")
|
||||
print(f"Embedding entries: {stats['entries']}")
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Memory Embedding Worker')
|
||||
parser.add_argument('--date', help='Process specific date (YYYY-MM-DD)')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Test without storing')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
stats = run_daily_sync(date=args.date, dry_run=args.dry_run)
|
||||
|
||||
# Return exit code: 0 if success, 1 if no entries
|
||||
return 0 if stats["entries"] > 0 else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
83
tools/proton_imap_simple.py
Normal file
83
tools/proton_imap_simple.py
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Simple Proton Mail Bridge IMAP test - clean version"""
|
||||
|
||||
import socket
|
||||
|
||||
IMAP_HOST = "127.0.0.1"
|
||||
IMAP_PORT = 1143
|
||||
USERNAME = "alexthenerdyai@proton.me"
|
||||
PASSWORD = "Is413#dfslw@alex"
|
||||
|
||||
def get_response(sock, tag):
|
||||
"""Read response until we see our tag"""
|
||||
response = b""
|
||||
while True:
|
||||
chunk = sock.recv(4096)
|
||||
if not chunk:
|
||||
break
|
||||
response += chunk
|
||||
if tag.encode() in response:
|
||||
break
|
||||
return response.decode()
|
||||
|
||||
def main():
|
||||
print("=" * 50)
|
||||
print("Proton Mail Bridge - IMAP Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Connect
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.settimeout(10)
|
||||
sock.connect((IMAP_HOST, IMAP_PORT))
|
||||
|
||||
# Greeting
|
||||
greeting = sock.recv(1024).decode()
|
||||
print(f"\n[CONNECT] {greeting.strip()[:80]}...")
|
||||
|
||||
# Login
|
||||
sock.send(f'A1 LOGIN "{USERNAME}" "{PASSWORD}"\r\n'.encode())
|
||||
resp = get_response(sock, "A1 OK")
|
||||
print("[LOGIN] Success!")
|
||||
|
||||
# List folders with LSUB
|
||||
sock.send(b'A2 LSUB "" "*"\r\n')
|
||||
resp = get_response(sock, "A2 OK")
|
||||
print("\n[FOLDERS]")
|
||||
folders = []
|
||||
for line in resp.split('\r\n'):
|
||||
if 'LSUB' in line and '/' in line:
|
||||
folder = line.split('"/"')[-1].strip().strip('"')
|
||||
folders.append(folder)
|
||||
print(f" - {folder}")
|
||||
|
||||
# Check INBOX
|
||||
sock.send(b'A3 EXAMINE "INBOX"\r\n')
|
||||
resp = get_response(sock, "A3 OK")
|
||||
|
||||
# Count messages
|
||||
exists = 0
|
||||
for line in resp.split('\r\n'):
|
||||
if 'EXISTS' in line:
|
||||
exists = int(line.split()[1])
|
||||
break
|
||||
print(f"\n[INBOX] {exists} total messages")
|
||||
|
||||
# Check unread
|
||||
sock.send(b'A4 SEARCH UNSEEN\r\n')
|
||||
resp = get_response(sock, "A4 OK")
|
||||
unseen_count = 0
|
||||
for line in resp.split('\r\n'):
|
||||
if 'SEARCH' in line and '*' in line:
|
||||
parts = line.split('SEARCH')
|
||||
if len(parts) > 1:
|
||||
unseen = parts[1].strip().split()
|
||||
unseen_count = len(unseen) if unseen and unseen[0] else 0
|
||||
print(f"[INBOX] {unseen_count} unread messages")
|
||||
|
||||
# Logout
|
||||
sock.send(b'A5 LOGOUT\r\n')
|
||||
print("\n[LOGOUT] Done!")
|
||||
sock.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
109
tools/proton_imap_test.py
Normal file
109
tools/proton_imap_test.py
Normal file
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Quick test of Proton Mail Bridge IMAP connectivity"""
|
||||
|
||||
import socket
|
||||
import ssl
|
||||
import base64
|
||||
|
||||
# Proton Bridge settings
|
||||
IMAP_HOST = "127.0.0.1"
|
||||
IMAP_PORT = 1143
|
||||
USERNAME = "alexthenerdyai@proton.me"
|
||||
PASSWORD = "8yiNBTJBMc6HyOQjIZKjMw"
|
||||
|
||||
def send_command(sock, cmd):
|
||||
"""Send IMAP command and get response"""
|
||||
sock.send(f"{cmd}\r\n".encode())
|
||||
response = b""
|
||||
while True:
|
||||
try:
|
||||
sock.settimeout(5)
|
||||
chunk = sock.recv(4096)
|
||||
if not chunk:
|
||||
break
|
||||
response += chunk
|
||||
# Check if we got a complete response (ends with \r\n)
|
||||
if b"\r\n" in chunk:
|
||||
break
|
||||
except socket.timeout:
|
||||
break
|
||||
return response.decode()
|
||||
|
||||
def main():
|
||||
print("=" * 50)
|
||||
print("Proton Mail Bridge IMAP Test")
|
||||
print("=" * 50)
|
||||
|
||||
# Connect to IMAP server
|
||||
print(f"\n[1] Connecting to {IMAP_HOST}:{IMAP_PORT}...")
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.settimeout(10)
|
||||
|
||||
try:
|
||||
sock.connect((IMAP_HOST, IMAP_PORT))
|
||||
greeting = sock.recv(1024).decode()
|
||||
print(f"[OK] Connected! Server: {greeting.strip()}")
|
||||
except Exception as e:
|
||||
print(f"[ERR] Connection failed: {e}")
|
||||
return
|
||||
|
||||
# Authenticate with LOGIN
|
||||
print("\n[2] Authenticating...")
|
||||
response = send_command(sock, f'A1 LOGIN "{USERNAME}" "{PASSWORD}"')
|
||||
|
||||
if "OK" in response and "completed" in response.lower():
|
||||
print("[OK] Authentication successful!")
|
||||
elif "OK" in response:
|
||||
print("[OK] Authentication successful!")
|
||||
else:
|
||||
print(f"[ERR] Authentication failed: {response.strip()}")
|
||||
sock.close()
|
||||
return
|
||||
|
||||
# List folders
|
||||
print("\n[3] Listing mailboxes...")
|
||||
|
||||
# Try LSUB first (subscribed folders)
|
||||
response = send_command(sock, 'A2a LSUB "" "*"')
|
||||
print(f"LSUB response:\n{response}")
|
||||
|
||||
response = send_command(sock, 'A2b LIST "" "*"')
|
||||
print(f"LIST response:\n{response}")
|
||||
|
||||
folders = []
|
||||
for line in response.split("\r\n"):
|
||||
if 'LSUB' in line:
|
||||
# Parse folder name from LSUB response
|
||||
if '"/' in line:
|
||||
folder = line.split('"/"')[-1].strip().strip('"')
|
||||
else:
|
||||
folder = line.split()[-1].strip().strip('"')
|
||||
folders.append(folder)
|
||||
print(f" [FOLDER] {folder}")
|
||||
|
||||
# Check INBOX for unread
|
||||
print("\n[4] Checking INBOX...")
|
||||
response = send_command(sock, 'A3 EXAMINE "INBOX"')
|
||||
print(f"EXAMINE response: {response[:200]}")
|
||||
|
||||
# Search for unread messages
|
||||
response = send_command(sock, 'A4 SEARCH UNSEEN')
|
||||
print(f"SEARCH response: {response[:200]}")
|
||||
if 'SEARCH' in response:
|
||||
unseen = response.split('SEARCH')[1].strip().split()
|
||||
if unseen and unseen[0] != '':
|
||||
print(f"[OK] {len(unseen)} unread messages")
|
||||
else:
|
||||
print("[OK] No unread messages")
|
||||
else:
|
||||
print(" Could not check unread count")
|
||||
|
||||
# Logout
|
||||
print("\n[5] Logging out...")
|
||||
send_command(sock, "A5 LOGOUT")
|
||||
print("[OK] Done!")
|
||||
|
||||
sock.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
238
tools/proton_read_email.py
Normal file
238
tools/proton_read_email.py
Normal file
@@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Read emails from Proton Mail INBOX - Full parsing"""
|
||||
|
||||
import socket
|
||||
import email
|
||||
from email import policy
|
||||
from email.parser import BytesParser
|
||||
from html.parser import HTMLParser
|
||||
|
||||
IMAP_HOST = "127.0.0.1"
|
||||
IMAP_PORT = 1143
|
||||
USERNAME = "alexthenerdyai@proton.me"
|
||||
PASSWORD = "8yiNBTJBMc6HyOQjIZKjMw"
|
||||
|
||||
def strip_html(html):
|
||||
"""Convert HTML to clean plain text"""
|
||||
import re
|
||||
|
||||
# Remove style/script tags and their contents
|
||||
html = re.sub(r'<(style|script)[^>]*>[^<]*</\1>', ' ', html, flags=re.IGNORECASE | re.DOTALL)
|
||||
|
||||
# Replace common block elements with newlines
|
||||
html = re.sub(r'</(p|div|h[1-6]|li|tr|br)>', '\n', html, flags=re.IGNORECASE)
|
||||
|
||||
# Replace table cells with tabs
|
||||
html = re.sub(r'</(td|th)>', '\t', html, flags=re.IGNORECASE)
|
||||
|
||||
# Remove all remaining HTML tags
|
||||
html = re.sub(r'<[^>]+>', ' ', html)
|
||||
|
||||
# Remove CSS @media queries and blocks
|
||||
html = re.sub(r'@media[^{]*\{[^}]*\}', ' ', html)
|
||||
|
||||
# Clean up whitespace
|
||||
html = re.sub(r'[\t ]+', ' ', html)
|
||||
html = re.sub(r'\n[\n ]+', '\n\n', html)
|
||||
|
||||
# Decode common HTML entities
|
||||
html = html.replace(' ', ' ')
|
||||
html = html.replace('<', '<')
|
||||
html = html.replace('>', '>')
|
||||
html = html.replace('&', '&')
|
||||
html = html.replace('"', '"')
|
||||
html = html.replace('’', "'")
|
||||
html = html.replace('‘', "'")
|
||||
html = html.replace('“', '"')
|
||||
html = html.replace('”', '"')
|
||||
html = html.replace('…', '...')
|
||||
|
||||
return html.strip()
|
||||
|
||||
def get_response(sock, tag):
|
||||
"""Read response until we see our tag"""
|
||||
response = b""
|
||||
while True:
|
||||
chunk = sock.recv(16384)
|
||||
if not chunk:
|
||||
break
|
||||
response += chunk
|
||||
if tag.encode() in response:
|
||||
break
|
||||
return response
|
||||
|
||||
def extract_rfc822(data):
|
||||
"""Extract RFC822 email from IMAP FETCH response"""
|
||||
# Find the start of the email data
|
||||
start = data.find(b'\r\n')
|
||||
if start == -1:
|
||||
return b''
|
||||
|
||||
# Find the end (A# OK)
|
||||
lines = data.split(b'\r\n')
|
||||
email_lines = []
|
||||
|
||||
collect = False
|
||||
for line in lines:
|
||||
# Start collecting after the FETCH response line
|
||||
if b'FETCH' in line and b'RFC822' in line:
|
||||
collect = True
|
||||
continue
|
||||
# Stop at the OK line
|
||||
if line.startswith((b'A4 ', b'A5 ', b'A6 ')) and b'OK' in line:
|
||||
break
|
||||
if collect:
|
||||
email_lines.append(line)
|
||||
|
||||
return b'\r\n'.join(email_lines)
|
||||
|
||||
def parse_email_body(msg):
|
||||
"""Extract plain text body from email message"""
|
||||
body_parts = []
|
||||
|
||||
if msg.is_multipart():
|
||||
for part in msg.walk():
|
||||
content_type = part.get_content_type()
|
||||
content_disposition = str(part.get("Content-Disposition", ""))
|
||||
|
||||
# Skip attachments
|
||||
if "attachment" in content_disposition:
|
||||
continue
|
||||
|
||||
payload = part.get_payload(decode=True)
|
||||
if not payload:
|
||||
continue
|
||||
|
||||
charset = part.get_content_charset() or 'utf-8'
|
||||
|
||||
try:
|
||||
text = payload.decode(charset, errors='replace')
|
||||
except:
|
||||
text = payload.decode('utf-8', errors='replace')
|
||||
|
||||
if content_type == "text/plain":
|
||||
body_parts.append(("text", text))
|
||||
elif content_type == "text/html":
|
||||
plain = strip_html(text)
|
||||
body_parts.append(("html", plain))
|
||||
else:
|
||||
payload = msg.get_payload(decode=True)
|
||||
if payload:
|
||||
charset = msg.get_content_charset() or 'utf-8'
|
||||
try:
|
||||
text = payload.decode(charset, errors='replace')
|
||||
except:
|
||||
text = payload.decode('utf-8', errors='replace')
|
||||
|
||||
content_type = msg.get_content_type()
|
||||
if content_type == "text/html":
|
||||
text = strip_html(text)
|
||||
|
||||
body_parts.append(("text", text))
|
||||
|
||||
return body_parts
|
||||
|
||||
def format_email(msg):
|
||||
"""Format email for display"""
|
||||
output = []
|
||||
output.append("=" * 60)
|
||||
|
||||
# Headers
|
||||
subject = msg['Subject'] or '(No Subject)'
|
||||
from_addr = msg['From'] or '(Unknown)'
|
||||
to_addr = msg['To'] or '(Unknown)'
|
||||
date = msg['Date'] or '(No Date)'
|
||||
|
||||
output.append(f"FROM: {from_addr}")
|
||||
output.append(f"TO: {to_addr}")
|
||||
output.append(f"DATE: {date}")
|
||||
output.append(f"SUBJECT: {subject}")
|
||||
output.append("=" * 60)
|
||||
|
||||
# Body
|
||||
body_parts = parse_email_body(msg)
|
||||
if body_parts:
|
||||
# Prefer plain text, but use HTML if that's all we have
|
||||
text_body = None
|
||||
for part_type, text in body_parts:
|
||||
if part_type == "text":
|
||||
text_body = text
|
||||
break
|
||||
if not text_body and body_parts:
|
||||
text_body = body_parts[0][1]
|
||||
|
||||
if text_body:
|
||||
output.append("\nBODY:")
|
||||
output.append("-" * 60)
|
||||
# Clean up whitespace
|
||||
text_body = '\n'.join(line.strip() for line in text_body.split('\n'))
|
||||
text_body = '\n'.join(filter(None, text_body.split('\n')))
|
||||
output.append(text_body[:2000]) # Limit length
|
||||
if len(text_body) > 2000:
|
||||
output.append("\n[... message truncated ...]")
|
||||
else:
|
||||
output.append("\n[No readable body content]")
|
||||
|
||||
return '\n'.join(output)
|
||||
|
||||
def read_message(sock, msg_num):
|
||||
"""Fetch and display a message"""
|
||||
sock.send(f'A4 FETCH {msg_num} RFC822\r\n'.encode())
|
||||
resp = get_response(sock, "A4 OK")
|
||||
|
||||
email_data = extract_rfc822(resp)
|
||||
if not email_data:
|
||||
print(f"[ERR] Could not extract message {msg_num}")
|
||||
return
|
||||
|
||||
try:
|
||||
msg = BytesParser(policy=policy.default).parsebytes(email_data)
|
||||
print(format_email(msg))
|
||||
except Exception as e:
|
||||
print(f"[ERR] Failed to parse: {e}")
|
||||
print("RAW:", email_data[:500])
|
||||
|
||||
def main():
|
||||
print("Proton Mail - Email Reader")
|
||||
print("=" * 60)
|
||||
|
||||
# Connect
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.settimeout(15)
|
||||
sock.connect((IMAP_HOST, IMAP_PORT))
|
||||
sock.recv(1024) # Greeting
|
||||
|
||||
# Login
|
||||
sock.send(f'A1 LOGIN "{USERNAME}" "{PASSWORD}"\r\n'.encode())
|
||||
get_response(sock, "A1 OK")
|
||||
print("[LOGIN] Success\n")
|
||||
|
||||
# Select INBOX
|
||||
sock.send(b'A2 SELECT "INBOX"\r\n')
|
||||
get_response(sock, "A2 OK")
|
||||
|
||||
# Get message list
|
||||
sock.send(b'A3 SEARCH ALL\r\n')
|
||||
resp = get_response(sock, "A3 OK").decode()
|
||||
|
||||
msg_nums = []
|
||||
for line in resp.split('\r\n'):
|
||||
if 'SEARCH' in line and '*' in line:
|
||||
parts = line.split('SEARCH')
|
||||
if len(parts) > 1:
|
||||
msg_nums = [n for n in parts[1].strip().split() if n.isdigit()]
|
||||
|
||||
print(f"[INFO] {len(msg_nums)} messages in INBOX\n")
|
||||
|
||||
# Read oldest (first) message
|
||||
if msg_nums:
|
||||
read_message(sock, msg_nums[0])
|
||||
|
||||
# Logout
|
||||
sock.send(b'A5 LOGOUT\r\n')
|
||||
sock.close()
|
||||
print("\n" + "=" * 60)
|
||||
print("[DONE]")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
81
tools/reminder-bot.py
Normal file
81
tools/reminder-bot.py
Normal file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Discord Reminder Bot - Called by OpenClaw cron to deliver reminders
|
||||
Uses the message tool to actually post to Discord
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
DB_PATH = os.path.expanduser("~/.openclaw/workspace/data/reminders.db")
|
||||
|
||||
|
||||
def get_due_reminders():
|
||||
"""Get reminders that are due now."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
c.execute('''SELECT id, user_id, channel_id, message, remind_at
|
||||
FROM reminders
|
||||
WHERE active = 1 AND remind_at <= datetime('now')''')
|
||||
results = c.fetchall()
|
||||
conn.close()
|
||||
return results
|
||||
|
||||
|
||||
def mark_reminder_sent(reminder_id):
|
||||
"""Mark reminder as inactive after sending."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
c.execute('UPDATE reminders SET active = 0 WHERE id = ?', (reminder_id,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def format_reminder_message(message, user_id):
|
||||
"""Format the reminder with mention."""
|
||||
return f"🔔 **Reminder for <@{user_id}>:** {message}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check for specific reminder ID (from cron)
|
||||
if len(sys.argv) > 1:
|
||||
reminder_id = int(sys.argv[1])
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
c.execute('''SELECT id, user_id, channel_id, message, remind_at
|
||||
FROM reminders WHERE id = ? AND active = 1''', (reminder_id,))
|
||||
result = c.fetchone()
|
||||
conn.close()
|
||||
|
||||
if result:
|
||||
# Output JSON for the agent to use with message tool
|
||||
output = {
|
||||
"action": "send_reminder",
|
||||
"reminder_id": result[0],
|
||||
"user_id": result[1],
|
||||
"channel_id": result[2],
|
||||
"message": result[3],
|
||||
"formatted": format_reminder_message(result[3], result[1])
|
||||
}
|
||||
print(json.dumps(output))
|
||||
else:
|
||||
print(json.dumps({"error": "Reminder not found or already sent"}))
|
||||
else:
|
||||
# Check for any due reminders
|
||||
due = get_due_reminders()
|
||||
if due:
|
||||
for r in due:
|
||||
output = {
|
||||
"action": "send_reminder",
|
||||
"reminder_id": r[0],
|
||||
"user_id": r[1],
|
||||
"channel_id": r[2],
|
||||
"message": r[3],
|
||||
"formatted": format_reminder_message(r[3], r[1])
|
||||
}
|
||||
print(json.dumps(output))
|
||||
else:
|
||||
print(json.dumps({"status": "no_due_reminders"}))
|
||||
103
tools/reminder-handler.ps1
Normal file
103
tools/reminder-handler.ps1
Normal file
@@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env pwsh
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Discord Reminder Handler - Called by OpenClaw to process reminder commands
|
||||
.DESCRIPTION
|
||||
Handles: add, list, delete, cleanup
|
||||
.EXAMPLE
|
||||
.\reminder-handler.ps1 add "458667380332036117" "1474636036905631867" "Call mom" "2h"
|
||||
.\reminder-handler.ps1 list
|
||||
.\reminder-handler.ps1 delete 5
|
||||
#>
|
||||
|
||||
$action = $args[0]
|
||||
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
|
||||
$pythonScript = Join-Path $scriptDir "reminder-manager.py"
|
||||
|
||||
function Get-RemindersJson {
|
||||
$json = & python $pythonScript "list" | Out-String
|
||||
return $json | ConvertFrom-Json
|
||||
}
|
||||
|
||||
switch ($action) {
|
||||
"add" {
|
||||
$userId = $args[1]
|
||||
$channelId = $args[2]
|
||||
$message = $args[3]
|
||||
$time = $args[4]
|
||||
|
||||
if (-not $userId -or -not $message -or -not $time) {
|
||||
Write-Error "Usage: add <user_id> <channel_id> <message> <time>"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Add to database
|
||||
$result = & python $pythonScript "add" $userId $channelId $message $time | Out-String
|
||||
$reminder = $result | ConvertFrom-Json
|
||||
|
||||
if ($reminder.error) {
|
||||
Write-Error $reminder.error
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Output "@{reminder_id:$($reminder.id)}"
|
||||
}
|
||||
|
||||
"list" {
|
||||
$userId = $args[1]
|
||||
$reminders = Get-RemindersJson
|
||||
|
||||
if ($userId) {
|
||||
$reminders = $reminders | Where-Object { $_.user_id -eq $userId }
|
||||
}
|
||||
|
||||
if ($reminders.Count -eq 0) {
|
||||
Write-Output "No active reminders."
|
||||
} else {
|
||||
$reminders | ForEach-Object {
|
||||
$when = [datetime]::Parse($_.remind_at).ToString("MMM d '\at' h:mm tt")
|
||||
Write-Output "`#$($_.id): $($_.message) - $when"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"delete" {
|
||||
$id = $args[1]
|
||||
if (-not $id) {
|
||||
Write-Error "Usage: delete <reminder_id>"
|
||||
exit 1
|
||||
}
|
||||
|
||||
$result = & python $pythonScript "delete" $id | Out-String
|
||||
$data = $result | ConvertFrom-Json
|
||||
|
||||
if ($data.deleted) {
|
||||
Write-Output "Reminder#$id cancelled."
|
||||
} else {
|
||||
Write-Error "Reminder not found."
|
||||
}
|
||||
}
|
||||
|
||||
"cleanup" {
|
||||
& python $pythonScript "cleanup"
|
||||
}
|
||||
|
||||
default {
|
||||
Write-Output @"
|
||||
Discord Reminder Handler
|
||||
|
||||
Usage: reminder-handler.ps1 <command> [args]
|
||||
|
||||
Commands:
|
||||
add <user_id> <channel_id> <message> <time> Add a reminder
|
||||
list [user_id] List reminders
|
||||
delete <id> Cancel a reminder
|
||||
cleanup Remove old entries
|
||||
|
||||
Time formats:
|
||||
20m, 2h, 1h30m - Relative time
|
||||
9am, 2:30pm - Today at time
|
||||
tomorrow 9am - Tomorrow at time
|
||||
"@
|
||||
}
|
||||
}
|
||||
270
tools/reminder-manager.py
Normal file
270
tools/reminder-manager.py
Normal file
@@ -0,0 +1,270 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Reminder Manager for OpenClaw Discord
|
||||
Handles one-shot and recurring reminders via OpenClaw cron + SQLite
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = os.path.expanduser("~/.openclaw/workspace/data/reminders.db")
|
||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||
|
||||
|
||||
def init_db():
|
||||
"""Create the reminders database."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
c.execute('''CREATE TABLE IF NOT EXISTS reminders (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
user_id TEXT NOT NULL,
|
||||
channel_id TEXT NOT NULL,
|
||||
message TEXT NOT NULL,
|
||||
remind_at TEXT NOT NULL,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
is_recurring INTEGER DEFAULT 0,
|
||||
recurrence_rule TEXT,
|
||||
cron_job_id TEXT,
|
||||
active INTEGER DEFAULT 1
|
||||
)''')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def parse_time(time_str: str) -> datetime:
|
||||
"""Parse various time formats into datetime."""
|
||||
now = datetime.now()
|
||||
time_str = time_str.lower().strip()
|
||||
|
||||
# Relative times: 20m, 2h, 1h30m
|
||||
match = re.match(r'^(\d+)m$', time_str)
|
||||
if match:
|
||||
return now + timedelta(minutes=int(match.group(1)))
|
||||
|
||||
match = re.match(r'^(\d+)h$', time_str)
|
||||
if match:
|
||||
return now + timedelta(hours=int(match.group(1)))
|
||||
|
||||
match = re.match(r'^(\d+)h(\d+)m$', time_str)
|
||||
if match:
|
||||
return now + timedelta(hours=int(match.group(1)), minutes=int(match.group(2)))
|
||||
|
||||
# Tomorrow
|
||||
if time_str == 'tomorrow':
|
||||
return now + timedelta(days=1)
|
||||
|
||||
# Tomorrow at time: tomorrow 9am, tomorrow 14:00
|
||||
match = re.match(r'^tomorrow\s+([\d:]+)(am|pm)?$', time_str)
|
||||
if match:
|
||||
time_part = match.group(1)
|
||||
ampm = match.group(2)
|
||||
tomorrow = now + timedelta(days=1)
|
||||
|
||||
if ':' in time_part:
|
||||
hour, minute = map(int, time_part.split(':'))
|
||||
else:
|
||||
hour, minute = int(time_part), 0
|
||||
|
||||
if ampm == 'pm' and hour != 12:
|
||||
hour += 12
|
||||
if ampm == 'am' and hour == 12:
|
||||
hour = 0
|
||||
|
||||
return tomorrow.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
|
||||
# Today at time: 9am, 14:00, 2:30pm
|
||||
match = re.match(r'^(\d{1,2}):(\d{2})(am|pm)?$', time_str)
|
||||
if match:
|
||||
hour = int(match.group(1))
|
||||
minute = int(match.group(2))
|
||||
ampm = match.group(3)
|
||||
|
||||
if ampm == 'pm' and hour != 12:
|
||||
hour += 12
|
||||
if ampm == 'am' and hour == 12:
|
||||
hour = 0
|
||||
|
||||
result = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
if result < now:
|
||||
result += timedelta(days=1)
|
||||
return result
|
||||
|
||||
match = re.match(r'^(\d{1,2})(am|pm)$', time_str)
|
||||
if match:
|
||||
hour = int(match.group(1))
|
||||
ampm = match.group(2)
|
||||
|
||||
if ampm == 'pm' and hour != 12:
|
||||
hour += 12
|
||||
if ampm == 'am' and hour == 12:
|
||||
hour = 0
|
||||
|
||||
result = now.replace(hour=hour, minute=0, second=0, microsecond=0)
|
||||
if result < now:
|
||||
result += timedelta(days=1)
|
||||
return result
|
||||
|
||||
raise ValueError(f"Can't parse time: {time_str}")
|
||||
|
||||
|
||||
def add_reminder(user_id: str, channel_id: str, message: str, time_str: str) -> dict:
|
||||
"""Add a new reminder and schedule it via OpenClaw cron."""
|
||||
init_db()
|
||||
|
||||
remind_at = parse_time(time_str)
|
||||
|
||||
if remind_at < datetime.now():
|
||||
return {"error": "Reminder time is in the past"}
|
||||
|
||||
# Insert into DB
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
c.execute('''INSERT INTO reminders (user_id, channel_id, message, remind_at, active)
|
||||
VALUES (?, ?, ?, ?, 1)''',
|
||||
(user_id, channel_id, message, remind_at.isoformat()))
|
||||
reminder_id = c.lastrowid
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# Schedule via OpenClaw cron (will be handled by caller)
|
||||
return {
|
||||
"id": reminder_id,
|
||||
"message": message,
|
||||
"remind_at": remind_at.isoformat(),
|
||||
"user_id": user_id,
|
||||
"channel_id": channel_id
|
||||
}
|
||||
|
||||
|
||||
def list_reminders(user_id: str = None) -> list:
|
||||
"""List active reminders for a user or all users."""
|
||||
init_db()
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
|
||||
if user_id:
|
||||
c.execute('''SELECT id, message, remind_at, channel_id
|
||||
FROM reminders
|
||||
WHERE user_id = ? AND active = 1 AND remind_at > datetime('now')
|
||||
ORDER BY remind_at''', (user_id,))
|
||||
else:
|
||||
c.execute('''SELECT id, message, remind_at, channel_id, user_id
|
||||
FROM reminders
|
||||
WHERE active = 1 AND remind_at > datetime('now')
|
||||
ORDER BY remind_at''')
|
||||
|
||||
results = c.fetchall()
|
||||
conn.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def delete_reminder(reminder_id: int, user_id: str = None) -> bool:
|
||||
"""Delete/cancel a reminder."""
|
||||
init_db()
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
|
||||
if user_id:
|
||||
c.execute('DELETE FROM reminders WHERE id = ? AND user_id = ?',
|
||||
(reminder_id, user_id))
|
||||
else:
|
||||
c.execute('DELETE FROM reminders WHERE id = ?', (reminder_id,))
|
||||
|
||||
deleted = c.rowcount > 0
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return deleted
|
||||
|
||||
|
||||
def delete_past_reminders():
|
||||
"""Clean up old reminder entries."""
|
||||
init_db()
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
c.execute('DELETE FROM reminders WHERE remind_at < datetime("now", "-1 day")')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def cron_callback(reminder_id: int):
|
||||
"""Called when a cron job fires - returns the reminder details."""
|
||||
init_db()
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
c.execute('SELECT message, user_id, channel_id FROM reminders WHERE id = ?',
|
||||
(reminder_id,))
|
||||
result = c.fetchone()
|
||||
|
||||
if result:
|
||||
# Mark as inactive after firing
|
||||
c.execute('UPDATE reminders SET active = 0 WHERE id = ?', (reminder_id,))
|
||||
conn.commit()
|
||||
|
||||
conn.close()
|
||||
|
||||
if result:
|
||||
return {
|
||||
"message": result[0],
|
||||
"user_id": result[1],
|
||||
"channel_id": result[2]
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
command = sys.argv[1] if len(sys.argv) > 1 else "help"
|
||||
|
||||
if command == "add":
|
||||
# Usage: reminder-manager.py add "user_id" "channel_id" "message" "time"
|
||||
user_id = sys.argv[2]
|
||||
channel_id = sys.argv[3]
|
||||
message = sys.argv[4]
|
||||
time_str = sys.argv[5]
|
||||
|
||||
result = add_reminder(user_id, channel_id, message, time_str)
|
||||
print(json.dumps(result))
|
||||
|
||||
elif command == "list":
|
||||
user_id = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
reminders = list_reminders(user_id)
|
||||
print(json.dumps([{
|
||||
"id": r[0],
|
||||
"message": r[1],
|
||||
"remind_at": r[2],
|
||||
"channel_id": r[3],
|
||||
"user_id": r[4] if len(r) > 4 else None
|
||||
} for r in reminders]))
|
||||
|
||||
elif command == "delete":
|
||||
reminder_id = int(sys.argv[2])
|
||||
user_id = sys.argv[3] if len(sys.argv) > 3 else None
|
||||
deleted = delete_reminder(reminder_id, user_id)
|
||||
print(json.dumps({"deleted": deleted}))
|
||||
|
||||
elif command == "callback":
|
||||
reminder_id = int(sys.argv[2])
|
||||
result = cron_callback(reminder_id)
|
||||
print(json.dumps(result) if result else "null")
|
||||
|
||||
elif command == "cleanup":
|
||||
delete_past_reminders()
|
||||
print("Cleanup complete")
|
||||
|
||||
else:
|
||||
print("""Usage:
|
||||
reminder-manager.py add "user_id" "channel_id" "message" "time"
|
||||
reminder-manager.py list [user_id]
|
||||
reminder-manager.py delete <reminder_id> [user_id]
|
||||
reminder-manager.py callback <reminder_id>
|
||||
reminder-manager.py cleanup
|
||||
""")
|
||||
111
tools/rom-full-scan.py
Normal file
111
tools/rom-full-scan.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import os
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
ROM_ROOT = 'R:\\'
|
||||
OUTPUT_DIR = Path('C:/Users/admin/.openclaw/workspace/rom-inventory')
|
||||
HASH_LIMIT = 100 * 1024 * 1024
|
||||
|
||||
def get_file_hash(filepath, limit_bytes=None):
|
||||
hasher = hashlib.md5()
|
||||
try:
|
||||
with open(filepath, 'rb') as f:
|
||||
if limit_bytes:
|
||||
chunk = f.read(limit_bytes)
|
||||
hasher.update(chunk)
|
||||
else:
|
||||
for chunk in iter(lambda: f.read(8192), b''):
|
||||
hasher.update(chunk)
|
||||
return hasher.hexdigest()
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
def scan_roms():
|
||||
print(f'Starting full ROM scan at {datetime.now()}')
|
||||
print(f'Root: {ROM_ROOT}')
|
||||
|
||||
all_files = []
|
||||
hash_map = {}
|
||||
stats = {
|
||||
'total_files': 0,
|
||||
'total_size': 0,
|
||||
'hashed_files': 0,
|
||||
'skipped_large': 0,
|
||||
'errors': 0,
|
||||
'by_system': {},
|
||||
'by_manufacturer': {}
|
||||
}
|
||||
|
||||
organized_path = Path(ROM_ROOT) / 'Rom Sets (Organized)'
|
||||
if organized_path.exists():
|
||||
for manufacturer_dir in organized_path.iterdir():
|
||||
if manufacturer_dir.is_dir():
|
||||
manufacturer = manufacturer_dir.name
|
||||
stats['by_manufacturer'][manufacturer] = {'files': 0, 'size': 0}
|
||||
|
||||
for system_dir in manufacturer_dir.iterdir():
|
||||
if system_dir.is_dir():
|
||||
system = system_dir.name
|
||||
if system not in stats['by_system']:
|
||||
stats['by_system'][system] = {'files': 0, 'size': 0}
|
||||
|
||||
for root, dirs, files in os.walk(system_dir):
|
||||
for f in files:
|
||||
filepath = Path(root) / f
|
||||
try:
|
||||
size = filepath.stat().st_size
|
||||
all_files.append({
|
||||
'path': str(filepath),
|
||||
'name': f,
|
||||
'size': size,
|
||||
'system': system,
|
||||
'manufacturer': manufacturer
|
||||
})
|
||||
stats['total_files'] += 1
|
||||
stats['total_size'] += size
|
||||
stats['by_manufacturer'][manufacturer]['files'] += 1
|
||||
stats['by_manufacturer'][manufacturer]['size'] += size
|
||||
stats['by_system'][system]['files'] += 1
|
||||
stats['by_system'][system]['size'] += size
|
||||
|
||||
if size <= HASH_LIMIT:
|
||||
file_hash = get_file_hash(filepath)
|
||||
if file_hash:
|
||||
all_files[-1]['hash'] = file_hash
|
||||
stats['hashed_files'] += 1
|
||||
if file_hash not in hash_map:
|
||||
hash_map[file_hash] = []
|
||||
hash_map[file_hash].append(str(filepath))
|
||||
else:
|
||||
stats['skipped_large'] += 1
|
||||
except Exception as e:
|
||||
stats['errors'] += 1
|
||||
|
||||
duplicates = {h: files for h, files in hash_map.items() if len(files) > 1}
|
||||
|
||||
print(f'\nScan complete at {datetime.now()}')
|
||||
print(f'Total files: {stats["total_files"]:,}')
|
||||
print(f'Total size: {stats["total_size"] / (1024**3):.2f} GB')
|
||||
print(f'Hashed: {stats["hashed_files"]:,}')
|
||||
print(f'Skipped (large): {stats["skipped_large"]:,}')
|
||||
print(f'Duplicates found: {len(duplicates)}')
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output = {
|
||||
'scan_date': datetime.now().isoformat(),
|
||||
'stats': stats,
|
||||
'duplicates': duplicates,
|
||||
'files': all_files[:10000]
|
||||
}
|
||||
|
||||
with open(OUTPUT_DIR / 'rom-full-scan.json', 'w') as f:
|
||||
json.dump(output, f, indent=2)
|
||||
|
||||
print(f'\nResults saved to {OUTPUT_DIR / "rom-full-scan.json"}')
|
||||
return stats
|
||||
|
||||
if __name__ == '__main__':
|
||||
scan_roms()
|
||||
156
tools/rom-inventory.py
Normal file
156
tools/rom-inventory.py
Normal file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ROM Library Inventory Scanner"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
ROOT = r"R:\"
|
||||
OUTPUT_DIR = r"C:\Users\admin\.openclaw\workspace\rom-inventory"
|
||||
|
||||
def ensure_output_dir():
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
def get_file_hash(filepath, block_size=65536):
|
||||
try:
|
||||
hasher = hashlib.md5()
|
||||
with open(filepath, "rb") as f:
|
||||
for block in iter(lambda: f.read(block_size), b""):
|
||||
hasher.update(block)
|
||||
return hasher.hexdigest()
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_file_size(filepath):
|
||||
try:
|
||||
return os.path.getsize(filepath)
|
||||
except:
|
||||
return 0
|
||||
|
||||
ROM_EXTENSIONS = {
|
||||
".sfc", ".nes", ".z64", ".v64", ".n64", ".gba", ".gbc", ".gb",
|
||||
".nds", ".3ds", ".iso", ".bin", ".cue", ".cso", ".pbp",
|
||||
".rom", ".md", ".sms", ".gg", ".sg", ".col", ".vec",
|
||||
".a26", ".a52", ".a78", ".lnx", ".j64", ".jag",
|
||||
".pce", ".ngp", ".ngc", ".ws", ".wsc", ".vb",
|
||||
".zip", ".7z", ".rar"
|
||||
}
|
||||
|
||||
def is_rom_file(filename):
|
||||
return Path(filename).suffix.lower() in ROM_EXTENSIONS
|
||||
|
||||
def scan_console(console_path, console_name):
|
||||
roms = []
|
||||
total_size = 0
|
||||
file_count = 0
|
||||
|
||||
console_path = Path(console_path)
|
||||
if not console_path.exists():
|
||||
return {"console": console_name, "error": "Path not found"}
|
||||
|
||||
for root, dirs, files in os.walk(console_path):
|
||||
for file in files:
|
||||
if is_rom_file(file):
|
||||
filepath = Path(root) / file
|
||||
size = get_file_size(filepath)
|
||||
total_size += size
|
||||
file_count += 1
|
||||
|
||||
file_hash = None
|
||||
if size < 100 * 1024 * 1024:
|
||||
file_hash = get_file_hash(filepath)
|
||||
|
||||
roms.append({
|
||||
"name": file,
|
||||
"path": str(filepath),
|
||||
"size": size,
|
||||
"hash": file_hash
|
||||
})
|
||||
|
||||
return {
|
||||
"console": console_name,
|
||||
"file_count": file_count,
|
||||
"total_size_bytes": total_size,
|
||||
"total_size_gb": round(total_size / (1024**3), 2),
|
||||
"roms": roms[:100]
|
||||
}
|
||||
|
||||
def scan_all():
|
||||
ensure_output_dir()
|
||||
|
||||
organized_path = Path(ROOT) / "Rom Sets (Organized)"
|
||||
|
||||
results = {
|
||||
"scan_date": datetime.now().isoformat(),
|
||||
"root": ROOT,
|
||||
"organized": {},
|
||||
"duplicates": [],
|
||||
"summary": {}
|
||||
}
|
||||
|
||||
print("Scanning Rom Sets (Organized)...")
|
||||
for manufacturer in organized_path.iterdir():
|
||||
if manufacturer.is_dir():
|
||||
print(f" {manufacturer.name}...")
|
||||
results["organized"][manufacturer.name] = {}
|
||||
for console in manufacturer.iterdir():
|
||||
if console.is_dir():
|
||||
console_data = scan_console(console, console.name)
|
||||
results["organized"][manufacturer.name][console.name] = console_data
|
||||
print(f" {console.name}: {console_data.get("file_count", 0)} files")
|
||||
|
||||
print("
|
||||
Finding duplicates...")
|
||||
all_hashes = defaultdict(list)
|
||||
for manufacturer, consoles in results["organized"].items():
|
||||
for console, data in consoles.items():
|
||||
if "roms" in data:
|
||||
for rom in data["roms"]:
|
||||
if rom.get("hash"):
|
||||
all_hashes[rom["hash"]].append({
|
||||
"name": rom["name"],
|
||||
"console": console,
|
||||
"path": rom["path"]
|
||||
})
|
||||
|
||||
results["duplicates"] = [
|
||||
{"hash": h, "files": files}
|
||||
for h, files in all_hashes.items()
|
||||
if len(files) > 1
|
||||
]
|
||||
|
||||
total_files = sum(
|
||||
data.get("file_count", 0)
|
||||
for consoles in results["organized"].values()
|
||||
for data in consoles.values()
|
||||
)
|
||||
total_size = sum(
|
||||
data.get("total_size_bytes", 0)
|
||||
for consoles in results["organized"].values()
|
||||
for data in consoles.values()
|
||||
)
|
||||
|
||||
results["summary"] = {
|
||||
"total_roms": total_files,
|
||||
"total_size_gb": round(total_size / (1024**3), 2),
|
||||
"duplicate_count": len(results["duplicates"])
|
||||
}
|
||||
|
||||
output_file = Path(OUTPUT_DIR) / f"rom-inventory-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
|
||||
with open(output_file, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
print(f"
|
||||
=== INVENTORY COMPLETE ===")
|
||||
print(f"Total ROMs: {total_files:,}")
|
||||
print(f"Total Size: {results["summary"]["total_size_gb"]:.2f} GB")
|
||||
print(f"Duplicates Found: {len(results["duplicates"])}")
|
||||
print(f"Output: {output_file}")
|
||||
|
||||
return results
|
||||
|
||||
if __name__ == "__main__":
|
||||
scan_all()
|
||||
53
tools/rom-quick-scan.py
Normal file
53
tools/rom-quick-scan.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
ROOT = "R:/"
|
||||
OUTPUT = "C:/Users/admin/.openclaw/workspace/rom-inventory"
|
||||
|
||||
DISC = {"PSX", "PS2", "PS3", "PSP", "PS VITA", "Sega CD", "Sega Dreamcast", "Sega Saturn", "OG XBOX", "XBOX 360"}
|
||||
EXTS = {".sfc", ".nes", ".z64", ".v64", ".n64", ".gba", ".gbc", ".gb", ".nds", ".3ds", ".iso", ".bin", ".cue", ".md", ".sms", ".gg", ".zip"}
|
||||
|
||||
os.makedirs(OUTPUT, exist_ok=True)
|
||||
|
||||
def count_folders(p):
|
||||
return sum(1 for x in Path(p).iterdir() if x.is_dir() and not x.name.startswith("."))
|
||||
|
||||
def count_files(p):
|
||||
c, s = 0, 0
|
||||
for r, d, f in os.walk(p):
|
||||
for x in f:
|
||||
if Path(x).suffix.lower() in EXTS:
|
||||
c += 1
|
||||
try: s += os.path.getsize(Path(r)/x)
|
||||
except: pass
|
||||
return c, s
|
||||
|
||||
org = Path(ROOT) / "Rom Sets (Organized)"
|
||||
data = {"date": datetime.now().isoformat(), "consoles": {}}
|
||||
total = 0
|
||||
size = 0
|
||||
|
||||
for mfr in org.iterdir():
|
||||
if mfr.is_dir():
|
||||
print(mfr.name)
|
||||
for con in mfr.iterdir():
|
||||
if con.is_dir():
|
||||
if any(d in con.name for d in DISC):
|
||||
n = count_folders(con)
|
||||
data["consoles"][con.name] = {"type": "disc", "count": n}
|
||||
else:
|
||||
n, s = count_files(con)
|
||||
data["consoles"][con.name] = {"type": "cart", "count": n, "size_gb": round(s/1e9, 2)}
|
||||
size += s
|
||||
total += n
|
||||
|
||||
data["total"] = total
|
||||
data["size_gb"] = round(size/1e9, 2)
|
||||
|
||||
out = Path(OUTPUT) / "rom-inventory.json"
|
||||
with open(out, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
print(f"Done: {total} items, {data['size_gb']} GB")
|
||||
print(f"Saved: {out}")
|
||||
102
tools/search_memories.py
Normal file
102
tools/search_memories.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Memory Vector Search CLI
|
||||
========================
|
||||
Search memories using semantic similarity.
|
||||
|
||||
Usage:
|
||||
python search_memories.py "your query here"
|
||||
python search_memories.py --interactive
|
||||
"""
|
||||
|
||||
import sys
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import requests
|
||||
from memory_vector import search_memories
|
||||
|
||||
OLLAMA_URL = "http://localhost:11434"
|
||||
EMBED_MODEL = "nomic-embed-text"
|
||||
|
||||
|
||||
def get_embedding(text: str) -> list:
|
||||
"""Generate embedding for query text."""
|
||||
response = requests.post(
|
||||
f"{OLLAMA_URL}/api/embeddings",
|
||||
json={"model": EMBED_MODEL, "prompt": text},
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["embedding"]
|
||||
|
||||
|
||||
def search(query: str, k: int = 5):
|
||||
"""Search memories by query."""
|
||||
print(f"Searching: '{query}'\n")
|
||||
|
||||
# Generate embedding for query
|
||||
print("Generating embedding...")
|
||||
query_embedding = get_embedding(query)
|
||||
|
||||
# Search database
|
||||
print(f"Searching {k} nearest neighbors...\n")
|
||||
results = search_memories(query_embedding, k=k)
|
||||
|
||||
if not results:
|
||||
print("No results found.")
|
||||
return
|
||||
|
||||
print("=" * 60)
|
||||
print("RESULTS")
|
||||
print("=" * 60)
|
||||
|
||||
for i, (path, content, distance) in enumerate(results, 1):
|
||||
# Clean up unicode for console
|
||||
path_clean = path.encode('ascii', 'ignore').decode() if path else "Unknown"
|
||||
content_clean = content.encode('ascii', 'ignore').decode() if content else ""
|
||||
print(f"\n{i}. [{path_clean}]")
|
||||
print(f" Distance: {distance:.4f}")
|
||||
print(f" Content: {content_clean[:150]}...")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(f"Found {len(results)} result(s)")
|
||||
|
||||
|
||||
def interactive():
|
||||
"""Interactive search mode."""
|
||||
print("Memory Vector Search")
|
||||
print("Type 'quit' to exit\n")
|
||||
|
||||
while True:
|
||||
query = input("Search: ").strip()
|
||||
if query.lower() in ('quit', 'exit', 'q'):
|
||||
break
|
||||
if not query:
|
||||
continue
|
||||
|
||||
try:
|
||||
search(query)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Search memories with vector similarity')
|
||||
parser.add_argument('query', nargs='?', help='Search query')
|
||||
parser.add_argument('-n', '--num', type=int, default=5, help='Number of results (default: 5)')
|
||||
parser.add_argument('-i', '--interactive', action='store_true', help='Interactive mode')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.interactive or not args.query:
|
||||
interactive()
|
||||
else:
|
||||
search(args.query, k=args.num)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
263
tools/session_monitor.py
Normal file
263
tools/session_monitor.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""
|
||||
Session Monitor
|
||||
===============
|
||||
Automatically tracks conversation transcripts and captures snapshots.
|
||||
Runs via cron every 2 minutes.
|
||||
|
||||
Reads OpenClaw session transcripts from:
|
||||
~/.openclaw/agents/main/sessions/*.jsonl
|
||||
|
||||
Usage:
|
||||
python session_monitor.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from memory_vector import store_memory
|
||||
|
||||
DB_PATH = os.path.expanduser("~/.openclaw/memory.db")
|
||||
SESSIONS_DIR = os.path.expanduser("~/.openclaw/agents/main/sessions")
|
||||
SNAPSHOT_THRESHOLD = 15 # Messages between snapshots
|
||||
|
||||
def get_db():
|
||||
"""Get database connection."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
def find_session_files():
|
||||
"""Find all session transcript files."""
|
||||
if not os.path.exists(SESSIONS_DIR):
|
||||
return []
|
||||
|
||||
files = []
|
||||
for f in Path(SESSIONS_DIR).glob("*.jsonl"):
|
||||
# Extract session ID from filename
|
||||
session_id = f.stem # filename without extension
|
||||
files.append({
|
||||
'session_id': session_id,
|
||||
'path': str(f),
|
||||
'mtime': os.path.getmtime(f)
|
||||
})
|
||||
|
||||
# Sort by most recent first
|
||||
files.sort(key=lambda x: x['mtime'], reverse=True)
|
||||
return files
|
||||
|
||||
def parse_transcript(filepath, start_index=0):
|
||||
"""
|
||||
Parse transcript file and extract user messages.
|
||||
Returns (messages, total_lines).
|
||||
"""
|
||||
messages = []
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
return messages, 0
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Cannot read {filepath}: {e}")
|
||||
return messages, 0
|
||||
|
||||
total_lines = len(lines)
|
||||
|
||||
# Process lines from start_index onwards
|
||||
for i, line in enumerate(lines[start_index:], start=start_index):
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Only count user messages
|
||||
if entry.get('type') == 'message':
|
||||
msg = entry.get('message', {})
|
||||
if msg.get('role') == 'user':
|
||||
content = msg.get('content', [])
|
||||
# Extract text content
|
||||
text_parts = []
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get('type') == 'text':
|
||||
text_parts.append(part.get('text', ''))
|
||||
elif isinstance(part, str):
|
||||
text_parts.append(part)
|
||||
|
||||
if text_parts:
|
||||
messages.append({
|
||||
'index': i,
|
||||
'timestamp': entry.get('timestamp'),
|
||||
'text': ' '.join(text_parts)
|
||||
})
|
||||
|
||||
return messages, total_lines
|
||||
|
||||
def generate_summary(messages):
|
||||
"""Generate a summary of messages for embedding."""
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
# Simple extraction: join first 1000 chars of message texts
|
||||
combined = ' | '.join([m['text'] for m in messages])
|
||||
return combined[:1000] if len(combined) > 1000 else combined
|
||||
|
||||
def process_session(session_info):
|
||||
"""
|
||||
Process a single session.
|
||||
Returns (snapshot_created, message_count).
|
||||
"""
|
||||
conn = get_db()
|
||||
cursor = conn.cursor()
|
||||
|
||||
session_id = session_info['session_id']
|
||||
filepath = session_info['path']
|
||||
|
||||
# Get or create tracking record
|
||||
cursor.execute('''
|
||||
SELECT * FROM session_tracking WHERE session_id = ?
|
||||
''', (session_id,))
|
||||
|
||||
tracking = cursor.fetchone()
|
||||
|
||||
if tracking is None:
|
||||
# New session
|
||||
cursor.execute('''
|
||||
INSERT INTO session_tracking
|
||||
(session_id, transcript_path, last_message_index, messages_since_snapshot)
|
||||
VALUES (?, ?, 0, 0)
|
||||
''', (session_id, filepath))
|
||||
conn.commit()
|
||||
last_index = 0
|
||||
since_snapshot = 0
|
||||
else:
|
||||
last_index = tracking['last_message_index']
|
||||
since_snapshot = tracking['messages_since_snapshot']
|
||||
|
||||
# Parse new messages
|
||||
messages, total_lines = parse_transcript(filepath, last_index)
|
||||
|
||||
if not messages:
|
||||
conn.close()
|
||||
return False, 0
|
||||
|
||||
new_count = len(messages)
|
||||
since_snapshot += new_count
|
||||
|
||||
print(f" [{session_id[:8]}...]: {new_count} new messages, {since_snapshot} since snapshot")
|
||||
|
||||
snapshot_created = False
|
||||
|
||||
# Check if threshold reached
|
||||
if since_snapshot >= SNAPSHOT_THRESHOLD:
|
||||
summary = generate_summary(messages[-SNAPSHOT_THRESHOLD:])
|
||||
|
||||
if summary and len(summary) > 50: # Only snapshot if substantive
|
||||
try:
|
||||
# Get embedding from Ollama
|
||||
import requests
|
||||
response = requests.post(
|
||||
"http://localhost:11434/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": summary[:2000]},
|
||||
timeout=30
|
||||
)
|
||||
embedding = response.json()["embedding"]
|
||||
|
||||
# Store in database
|
||||
source_path = f"session://{session_id}#{datetime.now().strftime('%H:%M')}"
|
||||
store_memory(
|
||||
source_type="auto_session",
|
||||
source_path=source_path,
|
||||
content=summary,
|
||||
embedding=embedding
|
||||
)
|
||||
|
||||
print(f" [OK] Snapshot saved: {source_path}")
|
||||
since_snapshot = 0
|
||||
snapshot_created = True
|
||||
|
||||
except Exception as e:
|
||||
print(f" [ERROR] Failed to create snapshot: {e}")
|
||||
else:
|
||||
print(f" [SKIP] Content too short for snapshot")
|
||||
since_snapshot = 0 # Reset anyway to avoid getting stuck
|
||||
|
||||
# Update tracking
|
||||
cursor.execute('''
|
||||
UPDATE session_tracking
|
||||
SET last_message_index = ?,
|
||||
messages_since_snapshot = ?,
|
||||
last_checkpoint_time = CURRENT_TIMESTAMP,
|
||||
transcript_path = ?
|
||||
WHERE session_id = ?
|
||||
''', (total_lines, since_snapshot, filepath, session_id))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return snapshot_created, new_count
|
||||
|
||||
def cleanup_old_sessions():
|
||||
"""Delete session files older than 24 hours."""
|
||||
sessions_dir = Path(SESSIONS_DIR)
|
||||
cutoff_time = datetime.now().timestamp() - (24 * 3600) # 24 hours
|
||||
|
||||
deleted_count = 0
|
||||
|
||||
for jsonl_file in sessions_dir.glob("*.jsonl"):
|
||||
if jsonl_file.stat().st_mtime < cutoff_time:
|
||||
try:
|
||||
jsonl_file.unlink()
|
||||
deleted_count += 1
|
||||
print(f" [CLEANUP] Deleted: {jsonl_file.name}")
|
||||
except Exception as e:
|
||||
print(f" [CLEANUP] Failed to delete {jsonl_file.name}: {e}")
|
||||
|
||||
return deleted_count
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
print(f"\n{'='*60}")
|
||||
print("Session Monitor")
|
||||
print(f"{'='*60}")
|
||||
print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print()
|
||||
|
||||
# Find session files
|
||||
sessions = find_session_files()
|
||||
|
||||
if not sessions:
|
||||
print("[INFO] No active session files found")
|
||||
return
|
||||
|
||||
print(f"Found {len(sessions)} session file(s)")
|
||||
print()
|
||||
|
||||
total_snapshots = 0
|
||||
total_messages = 0
|
||||
|
||||
for session in sessions:
|
||||
snapshot, count = process_session(session)
|
||||
total_messages += count
|
||||
if snapshot:
|
||||
total_snapshots += 1
|
||||
|
||||
# Cleanup old sessions
|
||||
deleted = cleanup_old_sessions()
|
||||
|
||||
print()
|
||||
print(f"{'='*60}")
|
||||
print(f"Summary: {total_messages} messages, {total_snapshots} snapshots, {deleted} files cleaned")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
55
tools/session_snapshotter.py
Normal file
55
tools/session_snapshotter.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Session Memory Snapshotter
|
||||
==========================
|
||||
Auto-summarizes conversation and embeds it.
|
||||
Called every 15 messages.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from datetime import datetime
|
||||
from memory_vector import store_memory
|
||||
import requests
|
||||
|
||||
OLLAMA_URL = "http://localhost:11434"
|
||||
EMBED_MODEL = "nomic-embed-text"
|
||||
|
||||
def get_embedding(text: str) -> list:
|
||||
"""Generate embedding via Ollama."""
|
||||
response = requests.post(
|
||||
f"{OLLAMA_URL}/api/embeddings",
|
||||
json={"model": EMBED_MODEL, "prompt": text[:2000]},
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["embedding"]
|
||||
|
||||
def save_snapshot(summary: str, participants: str = "Corey, Alex"):
|
||||
"""Save a conversation snapshot with embedding."""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
|
||||
|
||||
# Generate embedding
|
||||
embedding = get_embedding(summary)
|
||||
|
||||
# Store in database
|
||||
source_path = f"session://{datetime.now().strftime('%Y-%m-%d')}#{timestamp}"
|
||||
store_memory(
|
||||
source_type="session_snapshot",
|
||||
source_path=source_path,
|
||||
content=summary,
|
||||
embedding=embedding
|
||||
)
|
||||
|
||||
return source_path
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Called with summary as argument
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python session_snapshotter.py 'summary text'")
|
||||
sys.exit(1)
|
||||
|
||||
summary = sys.argv[1]
|
||||
path = save_snapshot(summary)
|
||||
print(f"[OK] Snapshot saved: {path}")
|
||||
369
tools/shipping_tracker.py
Normal file
369
tools/shipping_tracker.py
Normal file
@@ -0,0 +1,369 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Shipping Tracker - Checks Proton Mail for tracking emails"""
|
||||
|
||||
import socket
|
||||
import sqlite3
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
from email import policy
|
||||
from email.parser import BytesParser
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Config
|
||||
IMAP_HOST = "127.0.0.1"
|
||||
IMAP_PORT = 1143
|
||||
USERNAME = "alexthenerdyai@proton.me"
|
||||
PASSWORD = "8yiNBTJBMc6HyOQjIZKjMw"
|
||||
DB_PATH = os.path.expanduser("~/.openclaw/workspace/data/shipping.db")
|
||||
CHANNEL_ID = "1473701182076752135"
|
||||
|
||||
# Tracking patterns
|
||||
CARRIERS = {
|
||||
'ups': {
|
||||
'patterns': [r'1Z[A-Z0-9]{16}', r'\b1Z[0-9A-Z]{16}\b'],
|
||||
'domains': ['ups.com', 'email.ups.com'],
|
||||
'name': 'UPS'
|
||||
},
|
||||
'fedex': {
|
||||
'patterns': [r'\b\d{12}\b', r'\b\d{14}\b', r'\b\d{20}\b'],
|
||||
'domains': ['fedex.com', 'email.fedex.com'],
|
||||
'name': 'FedEx'
|
||||
},
|
||||
'usps': {
|
||||
'patterns': [r'\b\d{20,22}\b', r'\b9\d{15,21}\b', r'\b[A-Z]{2}\d{9}[A-Z]{2}\b'],
|
||||
'domains': ['usps.com', 'email.usps.gov'],
|
||||
'name': 'USPS'
|
||||
},
|
||||
'dhl': {
|
||||
'patterns': [r'\b\d{10}\b', r'\b\d{11}\b'],
|
||||
'domains': ['dhl.com', 'dhl-usa.com'],
|
||||
'name': 'DHL'
|
||||
},
|
||||
'amazon': {
|
||||
'patterns': [r'\b\d{12,14}\b'],
|
||||
'domains': ['amazon.com', 'amazon.ca', 'amazon.co.uk'],
|
||||
'name': 'Amazon Logistics'
|
||||
},
|
||||
'ontrac': {
|
||||
'patterns': [r'\bC\d{14}\b', r'\bD\d{14}\b'],
|
||||
'domains': ['ontrac.com'],
|
||||
'name': 'OnTrac'
|
||||
},
|
||||
'lasership': {
|
||||
'patterns': [r'\b1LS\d{12}\b', r'\bLX\d{8,12}\b'],
|
||||
'domains': ['lasership.com'],
|
||||
'name': 'LaserShip'
|
||||
}
|
||||
}
|
||||
|
||||
# Date patterns for estimated delivery
|
||||
DATE_PATTERNS = [
|
||||
r'(?:arriving|delivery|delivered by|estimated delivery)[\s:]*(?:on|by)?\s*(monday|tuesday|wednesday|thursday|friday|saturday|sunday)?[,\s]*([a-z]+)\s+(\d{1,2})[,\s]+(\d{4})',
|
||||
r'(?:arrives|delivery on|by)\s+(\w+)\s+(\d{1,2})',
|
||||
r'(?:expected|estimated|delivery)[\s:]*(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{2,4})',
|
||||
r'(?:will arrive|delivered)\s+(?:on\s+)?(\w+day),?\s*(\w+)\s*(\d{1,2})',
|
||||
]
|
||||
|
||||
def init_db():
|
||||
"""Initialize SQLite database"""
|
||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.execute('''
|
||||
CREATE TABLE IF NOT EXISTS shipments (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
message_id TEXT UNIQUE,
|
||||
sender TEXT,
|
||||
sender_email TEXT,
|
||||
subject TEXT,
|
||||
carrier TEXT,
|
||||
tracking_number TEXT,
|
||||
estimated_delivery TEXT,
|
||||
tracking_url TEXT,
|
||||
received_at TEXT,
|
||||
notified_at TEXT,
|
||||
delivered INTEGER DEFAULT 0
|
||||
)
|
||||
''')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def get_response(sock, tag):
|
||||
"""Read IMAP response until tag"""
|
||||
response = b""
|
||||
while True:
|
||||
chunk = sock.recv(16384)
|
||||
if not chunk:
|
||||
break
|
||||
response += chunk
|
||||
if tag.encode() in response:
|
||||
break
|
||||
return response
|
||||
|
||||
def extract_rfc822(data):
|
||||
"""Extract email from IMAP FETCH response"""
|
||||
lines = data.split(b'\r\n')
|
||||
email_lines = []
|
||||
collect = False
|
||||
for line in lines:
|
||||
if b'FETCH' in line and b'RFC822' in line:
|
||||
collect = True
|
||||
continue
|
||||
if line.startswith((b'A4 ', b'A5 ', b'A6 ', b'A7 ')) and b'OK' in line:
|
||||
break
|
||||
if collect:
|
||||
email_lines.append(line)
|
||||
return b'\r\n'.join(email_lines)
|
||||
|
||||
def strip_html(html):
|
||||
"""Convert HTML to plain text"""
|
||||
import re
|
||||
html = re.sub(r'<(style|script)[^>]*>[^<]*</\1>', ' ', html, flags=re.IGNORECASE | re.DOTALL)
|
||||
html = re.sub(r'</(p|div|h[1-6]|li|tr|br)>', '\n', html, flags=re.IGNORECASE)
|
||||
html = re.sub(r'<[^>]+>', ' ', html)
|
||||
html = re.sub(r'\s+', ' ', html)
|
||||
return html.strip()
|
||||
|
||||
def parse_email(msg):
|
||||
"""Extract email content"""
|
||||
body = ''
|
||||
if msg.is_multipart():
|
||||
for part in msg.walk():
|
||||
content_type = part.get_content_type()
|
||||
if content_type == 'text/plain':
|
||||
payload = part.get_payload(decode=True)
|
||||
if payload:
|
||||
body += payload.decode('utf-8', errors='ignore')
|
||||
elif content_type == 'text/html':
|
||||
payload = part.get_payload(decode=True)
|
||||
if payload:
|
||||
body += strip_html(payload.decode('utf-8', errors='ignore'))
|
||||
else:
|
||||
payload = msg.get_payload(decode=True)
|
||||
if payload:
|
||||
body = payload.decode('utf-8', errors='ignore')
|
||||
return body
|
||||
|
||||
def detect_carrier(from_email, body, subject):
|
||||
"""Detect carrier from email domain and content"""
|
||||
domain = from_email.split('@')[-1].lower() if '@' in from_email else ''
|
||||
text = f"{subject} {body}".lower()
|
||||
|
||||
for carrier_id, info in CARRIERS.items():
|
||||
# Check domain
|
||||
if any(d in domain for d in info['domains']):
|
||||
return carrier_id
|
||||
# Check for carrier name in text
|
||||
if carrier_id in text or info['name'].lower() in text:
|
||||
return carrier_id
|
||||
|
||||
return None
|
||||
|
||||
def extract_tracking(body, carrier_id):
|
||||
"""Extract tracking number from email body"""
|
||||
if not carrier_id or carrier_id not in CARRIERS:
|
||||
# Try all patterns
|
||||
for cid, info in CARRIERS.items():
|
||||
for pattern in info['patterns']:
|
||||
match = re.search(pattern, body, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(0), cid
|
||||
return None, None
|
||||
|
||||
# Use carrier-specific patterns
|
||||
for pattern in CARRIERS[carrier_id]['patterns']:
|
||||
match = re.search(pattern, body, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(0), carrier_id
|
||||
|
||||
return None, carrier_id
|
||||
|
||||
def extract_delivery_date(body):
|
||||
"""Extract estimated delivery date from email"""
|
||||
text = body.lower()
|
||||
months = ['january', 'february', 'march', 'april', 'may', 'june',
|
||||
'july', 'august', 'september', 'october', 'november', 'december',
|
||||
'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
|
||||
|
||||
for pattern in DATE_PATTERNS:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
return match.group(0).strip()
|
||||
|
||||
return None
|
||||
|
||||
def get_tracking_url(carrier, tracking_num):
|
||||
"""Generate tracking URL for carrier"""
|
||||
urls = {
|
||||
'ups': f'https://www.ups.com/track?tracknum={tracking_num}',
|
||||
'fedex': f'https://www.fedex.com/fedextrack/?trknbr={tracking_num}',
|
||||
'usps': f'https://tools.usps.com/go/TrackConfirmAction?tLabels={tracking_num}',
|
||||
'dhl': f'https://www.dhl.com/en/express/tracking.html?AWB={tracking_num}',
|
||||
'amazon': f'https://track.amazon.com/tracking/{tracking_num}',
|
||||
'ontrac': f'https://www.ontrac.com/tracking/?number={tracking_num}',
|
||||
'lasership': f'https://www.lasership.com/track/{tracking_num}'
|
||||
}
|
||||
return urls.get(carrier, None)
|
||||
|
||||
def process_message(sock, msg_num, conn):
|
||||
"""Process a single email message"""
|
||||
# Fetch full message
|
||||
sock.send(f'A4 FETCH {msg_num} RFC822\r\n'.encode())
|
||||
resp = get_response(sock, "A4 OK")
|
||||
|
||||
email_data = extract_rfc822(resp)
|
||||
if not email_data:
|
||||
return None
|
||||
|
||||
try:
|
||||
msg = BytesParser(policy=policy.default).parsebytes(email_data)
|
||||
except:
|
||||
return None
|
||||
|
||||
message_id = msg.get('Message-ID', '').strip('<>')
|
||||
if not message_id:
|
||||
return None
|
||||
|
||||
# Check if already processed
|
||||
cursor = conn.execute('SELECT 1 FROM shipments WHERE message_id = ?', (message_id,))
|
||||
if cursor.fetchone():
|
||||
return None
|
||||
|
||||
# Parse email
|
||||
sender_email = msg.get('From', '')
|
||||
sender_name = sender_email.split('<')[0].strip() if '<' in sender_email else sender_email
|
||||
subject = msg.get('Subject', '(No Subject)')
|
||||
body = parse_email(msg)
|
||||
|
||||
# Detect carrier and tracking
|
||||
carrier_id = detect_carrier(sender_email, body, subject)
|
||||
tracking_num, carrier_id = extract_tracking(body, carrier_id)
|
||||
|
||||
if not tracking_num:
|
||||
return None
|
||||
|
||||
est_delivery = extract_delivery_date(body)
|
||||
tracking_url = get_tracking_url(carrier_id, tracking_num)
|
||||
|
||||
# Save to database
|
||||
carrier_name = CARRIERS.get(carrier_id, {}).get('name', carrier_id.upper())
|
||||
|
||||
conn.execute('''
|
||||
INSERT INTO shipments (message_id, sender, sender_email, subject, carrier,
|
||||
tracking_number, estimated_delivery, tracking_url,
|
||||
received_at, notified_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
message_id, sender_name, sender_email, subject, carrier_name,
|
||||
tracking_num, est_delivery, tracking_url,
|
||||
datetime.now().isoformat(), datetime.now().isoformat()
|
||||
))
|
||||
conn.commit()
|
||||
|
||||
return {
|
||||
'sender': sender_name,
|
||||
'carrier': carrier_name,
|
||||
'tracking': tracking_num,
|
||||
'estimated': est_delivery or 'Unknown',
|
||||
'url': tracking_url,
|
||||
'subject': subject
|
||||
}
|
||||
|
||||
def format_notification(shipment):
|
||||
"""Format shipment for Discord notification"""
|
||||
lines = [
|
||||
f"\ud83d\ude9a **New Shipment Detected**",
|
||||
f"",
|
||||
f"**From:** {shipment['sender']}",
|
||||
f"**Carrier:** {shipment['carrier']}",
|
||||
f"**Tracking:** `{shipment['tracking']}`",
|
||||
f"**Est. Delivery:** {shipment['estimated']}"
|
||||
]
|
||||
|
||||
if shipment['url']:
|
||||
lines.append(f"**Track:** <{shipment['url']}>")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def send_discord_notification(content, channel_id):
|
||||
"""Send notification to Discord (placeholder - will use message tool or sessions)"""
|
||||
# This will be called via sessions_send or we can write to a file for cron
|
||||
print(f"[NOTIFY] Channel {channel_id}: {content[:100]}...")
|
||||
return True
|
||||
|
||||
def main():
|
||||
init_db()
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
|
||||
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] Checking for new shipments...")
|
||||
|
||||
try:
|
||||
# Connect to IMAP
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.settimeout(15)
|
||||
sock.connect((IMAP_HOST, IMAP_PORT))
|
||||
sock.recv(1024)
|
||||
|
||||
# Login
|
||||
sock.send(f'A1 LOGIN "{USERNAME}" "{PASSWORD}"\r\n'.encode())
|
||||
get_response(sock, "A1 OK")
|
||||
|
||||
# Select INBOX
|
||||
sock.send(b'A2 SELECT "INBOX"\r\n')
|
||||
get_response(sock, "A2 OK")
|
||||
|
||||
# Search unread messages
|
||||
sock.send(b'A3 SEARCH UNSEEN\r\n')
|
||||
resp = get_response(sock, "A3 OK").decode()
|
||||
|
||||
msg_nums = []
|
||||
for line in resp.split('\r\n'):
|
||||
if 'SEARCH' in line and '*' in line:
|
||||
parts = line.split('SEARCH')
|
||||
if len(parts) > 1:
|
||||
msg_nums = [n for n in parts[1].strip().split() if n.isdigit()]
|
||||
|
||||
if not msg_nums:
|
||||
print("[INFO] No unread messages")
|
||||
conn.close()
|
||||
return []
|
||||
|
||||
print(f"[INFO] {len(msg_nums)} unread messages to check")
|
||||
|
||||
shipments = []
|
||||
for msg_num in msg_nums:
|
||||
shipment = process_message(sock, msg_num, conn)
|
||||
if shipment:
|
||||
shipments.append(shipment)
|
||||
print(f"[FOUND] {shipment['carrier']} - {shipment['tracking']}")
|
||||
|
||||
# Logout
|
||||
sock.send(b'A5 LOGOUT\r\n')
|
||||
sock.close()
|
||||
|
||||
# Output results for Discord
|
||||
if shipments:
|
||||
output = {'channel': CHANNEL_ID, 'shipments': shipments}
|
||||
output_path = os.path.expanduser('~/.openclaw/workspace/data/shipping_output.json')
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(output, f)
|
||||
print(f"[INFO] Found {len(shipments)} shipments, saved to {output_path}")
|
||||
else:
|
||||
print("[INFO] No new shipments found")
|
||||
|
||||
return shipments
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERR] {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return []
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
shipments = main()
|
||||
if shipments:
|
||||
for s in shipments:
|
||||
print("\n" + format_notification(s))
|
||||
231
tools/shipping_tracker_cron.py
Normal file
231
tools/shipping_tracker_cron.py
Normal file
@@ -0,0 +1,231 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Shipping Tracker with Discord notifications"""
|
||||
|
||||
import socket
|
||||
import sqlite3
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from email import policy
|
||||
from email.parser import BytesParser
|
||||
from datetime import datetime
|
||||
|
||||
# Config
|
||||
IMAP_HOST = "127.0.0.1"
|
||||
IMAP_PORT = 1143
|
||||
USERNAME = "alexthenerdyai@proton.me"
|
||||
PASSWORD = "8yiNBTJBMc6HyOQjIZKjMw"
|
||||
DB_PATH = os.path.expanduser("~/.openclaw/workspace/data/shipping.db")
|
||||
CHANNEL_ID = "1473701182076752135"
|
||||
|
||||
CARRIERS = {
|
||||
'ups': {'patterns': [r'1Z[A-Z0-9]{16}'], 'domains': ['ups.com'], 'name': 'UPS'},
|
||||
'fedex': {'patterns': [r'\b\d{12,20}\b'], 'domains': ['fedex.com'], 'name': 'FedEx'},
|
||||
'usps': {'patterns': [r'\b\d{20,22}\b', r'\b9\d{15,21}\b'], 'domains': ['usps.com'], 'name': 'USPS'},
|
||||
'dhl': {'patterns': [r'\b\d{10,11}\b'], 'domains': ['dhl.com'], 'name': 'DHL'},
|
||||
'amazon': {'patterns': [r'\b\d{12,14}\b'], 'domains': ['amazon.com'], 'name': 'Amazon'},
|
||||
}
|
||||
|
||||
def init_db():
|
||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.execute('''CREATE TABLE IF NOT EXISTS shipments (
|
||||
id INTEGER PRIMARY KEY, message_id TEXT UNIQUE, sender TEXT, carrier TEXT,
|
||||
tracking_number TEXT, estimated_delivery TEXT, tracking_url TEXT,
|
||||
received_at TEXT, notified_at TEXT)''')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def get_response(sock, tag):
|
||||
response = b""
|
||||
while True:
|
||||
chunk = sock.recv(16384)
|
||||
if not chunk: break
|
||||
response += chunk
|
||||
if tag.encode() in response: break
|
||||
return response
|
||||
|
||||
def extract_rfc822(data):
|
||||
lines = data.split(b'\r\n')
|
||||
email_lines, collect = [], False
|
||||
for line in lines:
|
||||
if b'FETCH' in line and b'RFC822' in line:
|
||||
collect = True
|
||||
continue
|
||||
if line.startswith((b'A4 ', b'A5 ')) and b'OK' in line:
|
||||
break
|
||||
if collect:
|
||||
email_lines.append(line)
|
||||
return b'\r\n'.join(email_lines)
|
||||
|
||||
def strip_html(html):
|
||||
import re
|
||||
html = re.sub(r'<(style|script)[^>]*>[^<]*</\1>', ' ', html, flags=re.I|re.S)
|
||||
html = re.sub(r'</(p|div|h[1-6]|li|tr|br)>', '\n', html, flags=re.I)
|
||||
html = re.sub(r'<[^>]+>', ' ', html)
|
||||
return re.sub(r'\s+', ' ', html).strip()
|
||||
|
||||
def parse_email(msg):
|
||||
body = ''
|
||||
if msg.is_multipart():
|
||||
for part in msg.walk():
|
||||
ct = part.get_content_type()
|
||||
payload = part.get_payload(decode=True)
|
||||
if not payload: continue
|
||||
if ct == 'text/plain':
|
||||
body += payload.decode('utf-8', errors='ignore')
|
||||
elif ct == 'text/html':
|
||||
body += strip_html(payload.decode('utf-8', errors='ignore'))
|
||||
else:
|
||||
payload = msg.get_payload(decode=True)
|
||||
if payload:
|
||||
body = payload.decode('utf-8', errors='ignore')
|
||||
return body
|
||||
|
||||
def detect_carrier(from_email, body, subject):
|
||||
domain = from_email.split('@')[-1].lower() if '@' in from_email else ''
|
||||
text = f"{subject} {body}".lower()
|
||||
for cid, info in CARRIERS.items():
|
||||
if any(d in domain for d in info['domains']): return cid
|
||||
if cid in text or info['name'].lower() in text: return cid
|
||||
return None
|
||||
|
||||
def extract_tracking(body, carrier_id):
|
||||
if not carrier_id or carrier_id not in CARRIERS:
|
||||
for cid, info in CARRIERS.items():
|
||||
for pat in info['patterns']:
|
||||
m = re.search(pat, body, re.I)
|
||||
if m: return m.group(0), cid
|
||||
return None, None
|
||||
for pat in CARRIERS[carrier_id]['patterns']:
|
||||
m = re.search(pat, body, re.I)
|
||||
if m: return m.group(0), carrier_id
|
||||
return None, carrier_id
|
||||
|
||||
def extract_delivery(body):
|
||||
patterns = [
|
||||
r'(?:arriving|delivery|delivered by|estimated delivery)[\s:]*(?:on|by)?\s*(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)?[,\s]*([a-z]+)\s+(\d{1,2})[,\s]+(\d{4})',
|
||||
r'(?:arrives|delivery on|by)\s+(\w+)\s+(\d{1,2})',
|
||||
]
|
||||
for pat in patterns:
|
||||
m = re.search(pat, body.lower())
|
||||
if m: return m.group(0).strip()
|
||||
return None
|
||||
|
||||
def get_tracking_url(carrier, num):
|
||||
urls = {
|
||||
'ups': f'https://www.ups.com/track?tracknum={num}',
|
||||
'fedex': f'https://www.fedex.com/fedextrack/?trknbr={num}',
|
||||
'usps': f'https://tools.usps.com/go/TrackConfirmAction?tLabels={num}',
|
||||
'dhl': f'https://www.dhl.com/en/express/tracking.html?AWB={num}',
|
||||
'amazon': f'https://track.amazon.com/tracking/{num}'
|
||||
}
|
||||
return urls.get(carrier)
|
||||
|
||||
def process_message(sock, msg_num, conn):
|
||||
sock.send(f'A4 FETCH {msg_num} RFC822\r\n'.encode())
|
||||
resp = get_response(sock, "A4 OK")
|
||||
email_data = extract_rfc822(resp)
|
||||
if not email_data: return None
|
||||
|
||||
try:
|
||||
msg = BytesParser(policy=policy.default).parsebytes(email_data)
|
||||
except: return None
|
||||
|
||||
message_id = msg.get('Message-ID', '').strip('<>')
|
||||
if not message_id: return None
|
||||
|
||||
cursor = conn.execute('SELECT 1 FROM shipments WHERE message_id = ?', (message_id,))
|
||||
if cursor.fetchone(): return None
|
||||
|
||||
sender_email = msg.get('From', '')
|
||||
sender_name = sender_email.split('<')[0].strip() if '<' in sender_email else sender_email
|
||||
subject = msg.get('Subject', '(No Subject)')
|
||||
body = parse_email(msg)
|
||||
|
||||
carrier_id = detect_carrier(sender_email, body, subject)
|
||||
tracking_num, carrier_id = extract_tracking(body, carrier_id)
|
||||
if not tracking_num: return None
|
||||
|
||||
est_delivery = extract_delivery(body)
|
||||
tracking_url = get_tracking_url(carrier_id, tracking_num)
|
||||
carrier_name = CARRIERS.get(carrier_id, {}).get('name', carrier_id.upper())
|
||||
|
||||
conn.execute('''INSERT INTO shipments (message_id, sender, carrier, tracking_number,
|
||||
estimated_delivery, tracking_url, received_at, notified_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)''',
|
||||
(message_id, sender_name, carrier_name, tracking_num, est_delivery,
|
||||
tracking_url, datetime.now().isoformat(), datetime.now().isoformat()))
|
||||
conn.commit()
|
||||
|
||||
return {
|
||||
'sender': sender_name, 'carrier': carrier_name, 'tracking': tracking_num,
|
||||
'estimated': est_delivery or 'Unknown', 'url': tracking_url
|
||||
}
|
||||
|
||||
def format_notification(s):
|
||||
lines = [
|
||||
f"📦 **New Shipment Detected**",
|
||||
f"",
|
||||
f"**From:** {s['sender']}",
|
||||
f"**Carrier:** {s['carrier']}",
|
||||
f"**Tracking:** `{s['tracking']}`",
|
||||
f"**Est. Delivery:** {s['estimated']}"
|
||||
]
|
||||
if s['url']:
|
||||
lines.append(f"**Track:** <{s['url']}>")
|
||||
return '\n'.join(lines)
|
||||
|
||||
def main():
|
||||
init_db()
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
shipments = []
|
||||
|
||||
try:
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.settimeout(15)
|
||||
sock.connect((IMAP_HOST, IMAP_PORT))
|
||||
sock.recv(1024)
|
||||
|
||||
sock.send(f'A1 LOGIN "{USERNAME}" "{PASSWORD}"\r\n'.encode())
|
||||
get_response(sock, "A1 OK")
|
||||
|
||||
sock.send(b'A2 SELECT "INBOX"\r\n')
|
||||
get_response(sock, "A2 OK")
|
||||
|
||||
sock.send(b'A3 SEARCH UNSEEN\r\n')
|
||||
resp = get_response(sock, "A3 OK").decode()
|
||||
|
||||
msg_nums = []
|
||||
for line in resp.split('\r\n'):
|
||||
if 'SEARCH' in line and '*' in line:
|
||||
parts = line.split('SEARCH')
|
||||
if len(parts) > 1:
|
||||
msg_nums = [n for n in parts[1].strip().split() if n.isdigit()]
|
||||
|
||||
for msg_num in msg_nums:
|
||||
s = process_message(sock, msg_num, conn)
|
||||
if s: shipments.append(s)
|
||||
|
||||
sock.send(b'A5 LOGOUT\r\n')
|
||||
sock.close()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# Output for Discord
|
||||
if shipments:
|
||||
for s in shipments:
|
||||
print(format_notification(s))
|
||||
print("---")
|
||||
else:
|
||||
print("No new shipments found.")
|
||||
|
||||
return shipments
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
152
tools/youtube-summarizer.py
Normal file
152
tools/youtube-summarizer.py
Normal file
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
YouTube Video Summarizer
|
||||
Extracts transcripts and generates bullet summaries with timestamps
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
import json
|
||||
import subprocess
|
||||
import tempfile
|
||||
import os
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
def extract_video_id(url):
|
||||
"""Extract YouTube video ID from various URL formats"""
|
||||
patterns = [
|
||||
r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
|
||||
r'youtube\.com/watch\?.*v=([a-zA-Z0-9_-]{11})',
|
||||
r'youtube\.com/shorts/([a-zA-Z0-9_-]{11})',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
def get_transcript_yt_dlp(video_id):
|
||||
"""Get transcript using yt-dlp"""
|
||||
try:
|
||||
# Use system temp directory (cross-platform)
|
||||
temp_dir = tempfile.gettempdir()
|
||||
base_path = os.path.join(temp_dir, f'yt_{video_id}')
|
||||
|
||||
result = subprocess.run(
|
||||
['yt-dlp', '--write-auto-sub', '--skip-download',
|
||||
'--sub-langs', 'en,en-en', '--convert-subs', 'srt',
|
||||
'-o', base_path, f'https://www.youtube.com/watch?v={video_id}'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
# Read subtitle file if created - try multiple formats
|
||||
sub_files = [
|
||||
f'{base_path}.en.srt',
|
||||
f'{base_path}.en.vtt',
|
||||
f'{base_path}.en-en.srt',
|
||||
f'{base_path}.en-en.vtt'
|
||||
]
|
||||
for sub_file in sub_files:
|
||||
try:
|
||||
with open(sub_file, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
return None
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
def parse_srt(srt_content):
|
||||
"""Parse SRT content into text with timestamps"""
|
||||
if not srt_content:
|
||||
return []
|
||||
|
||||
entries = []
|
||||
blocks = srt_content.strip().split('\n\n')
|
||||
|
||||
for block in blocks:
|
||||
lines = block.split('\n')
|
||||
if len(lines) >= 3:
|
||||
# Line 1: index number
|
||||
# Line 2: timestamp (00:00:00,000 --> 00:00:05,000)
|
||||
# Line 3+: text
|
||||
timestamp_line = lines[1]
|
||||
text = ' '.join(lines[2:])
|
||||
|
||||
# Extract start time
|
||||
start_time = timestamp_line.split(' --> ')[0].replace(',', '.')
|
||||
|
||||
entries.append({
|
||||
'time': start_time,
|
||||
'text': text
|
||||
})
|
||||
|
||||
return entries
|
||||
|
||||
def chunk_and_summarize(entries, chunk_size=2000):
|
||||
"""Chunk transcript and return summary format"""
|
||||
if not entries:
|
||||
return "No transcript available for this video."
|
||||
|
||||
full_text = ' '.join([e['text'] for e in entries])
|
||||
|
||||
# Sample every Nth entry for key points
|
||||
sampled = entries[::max(1, len(entries)//10)]
|
||||
|
||||
result = []
|
||||
result.append("## Transcript Summary")
|
||||
result.append("")
|
||||
result.append(f"**Total entries:** {len(entries)}")
|
||||
result.append(f"**Duration:** ~{entries[-1]['time'][:5] if entries else 'Unknown'}")
|
||||
result.append("")
|
||||
result.append("### Key Points with Timestamps")
|
||||
result.append("")
|
||||
|
||||
for entry in sampled[:15]: # Top 15 samples
|
||||
time = entry['time'][:5] # MM:SS
|
||||
text = entry['text'][:100] # First 100 chars
|
||||
result.append(f"- **{time}** -- {text}...")
|
||||
|
||||
result.append("")
|
||||
result.append("### Full Context (first 2000 chars)")
|
||||
result.append(full_text[:2000] + "..." if len(full_text) > 2000 else full_text)
|
||||
|
||||
return "\n".join(result)
|
||||
|
||||
def main():
|
||||
# Fix Windows encoding issues
|
||||
import sys
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: youtube-summarizer.py <youtube_url>")
|
||||
sys.exit(1)
|
||||
|
||||
url = sys.argv[1]
|
||||
video_id = extract_video_id(url)
|
||||
|
||||
if not video_id:
|
||||
print(f"ERROR: Could not extract video ID from: {url}")
|
||||
print("Supported formats: youtube.com/watch?v=ID, youtu.be/ID, youtube.com/shorts/ID")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Processing video: {video_id}")
|
||||
|
||||
srt_content = get_transcript_yt_dlp(video_id)
|
||||
|
||||
if srt_content and not srt_content.startswith("Error"):
|
||||
entries = parse_srt(srt_content)
|
||||
summary = chunk_and_summarize(entries)
|
||||
print(summary)
|
||||
elif srt_content is None:
|
||||
print(f"No transcript available for video: {video_id}")
|
||||
print("This video may not have auto-generated captions, or they may be restricted.")
|
||||
print("Try a different video with visible captions enabled.")
|
||||
else:
|
||||
print(f"ERROR: Failed to download transcript")
|
||||
print(f"Details: {srt_content}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user