Fresh start - excluded large ROM JSON files
This commit is contained in:
103
scripts/extract_memory.py
Normal file
103
scripts/extract_memory.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import sqlite3
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
# Yesterday's date: Feb 25, 2026
|
||||
yesterday = "2026-02-25"
|
||||
|
||||
# Read yesterday's memory file
|
||||
with open(r'C:\Users\admin\.openclaw\workspace\memory\2026-02-25.md', 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
print(f"=== Processing {yesterday} ===")
|
||||
print(f"File size: {len(content)} characters")
|
||||
print(f"Content preview:\n{content[:500]}\n")
|
||||
|
||||
# Connect to database
|
||||
conn = sqlite3.connect(r'C:\Users\admin\.openclaw\memory.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Parse structured data
|
||||
entries = []
|
||||
|
||||
# Extract sections by headers
|
||||
sections = re.split(r'\n## ', content)
|
||||
print(f"Found {len(sections)} sections")
|
||||
|
||||
for section in sections:
|
||||
section = section.strip()
|
||||
if not section:
|
||||
continue
|
||||
|
||||
lines = section.split('\n')
|
||||
heading = lines[0].replace('#', '').strip()
|
||||
body = '\n'.join(lines[1:]).strip()
|
||||
|
||||
# Classify entry type
|
||||
entry_type = "note"
|
||||
if any(x in heading.lower() for x in ['bug', 'issue', 'error', 'problem', 'failed']):
|
||||
entry_type = "bug"
|
||||
elif any(x in heading.lower() for x in ['todo', 'action', 'task', 'next']):
|
||||
entry_type = "action_item"
|
||||
elif any(x in heading.lower() for x in ['decision', 'decided', 'chose', 'selected']):
|
||||
entry_type = "decision"
|
||||
elif any(x in heading.lower() for x in ['system', 'status', 'check', 'cron']):
|
||||
entry_type = "system_status"
|
||||
elif any(x in heading.lower() for x in ['morning', 'evening', 'afternoon']):
|
||||
entry_type = "log"
|
||||
|
||||
# Extract tags from content
|
||||
tags = []
|
||||
if 'ha ' in body.lower() or 'home assistant' in body.lower():
|
||||
tags.append('ha')
|
||||
if 'cron' in body.lower():
|
||||
tags.append('cron')
|
||||
if 'discord' in body.lower():
|
||||
tags.append('discord')
|
||||
if 'memory' in body.lower():
|
||||
tags.append('memory')
|
||||
|
||||
tags_str = ','.join(tags) if tags else None
|
||||
|
||||
entries.append({
|
||||
'source_date': yesterday,
|
||||
'entry_type': entry_type,
|
||||
'title': heading[:200],
|
||||
'content': body[:2000] if body else None,
|
||||
'tags': tags_str
|
||||
})
|
||||
|
||||
print(f" Extracted: {entry_type} - {heading[:60]}")
|
||||
|
||||
# Insert into database
|
||||
inserted_count = 0
|
||||
for entry in entries:
|
||||
cursor.execute("""
|
||||
INSERT INTO memory_entries (source_date, entry_type, title, content, tags)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (entry['source_date'], entry['entry_type'], entry['title'],
|
||||
entry['content'], entry['tags']))
|
||||
inserted_count += 1
|
||||
|
||||
# Log the extraction
|
||||
cursor.execute("""
|
||||
INSERT INTO extraction_log (date, items_extracted, errors)
|
||||
VALUES (?, ?, ?)
|
||||
""", (yesterday, inserted_count, None if inserted_count > 0 else "Minimal content to extract"))
|
||||
|
||||
# Update daily_summary
|
||||
cursor.execute("""
|
||||
INSERT OR REPLACE INTO daily_summary
|
||||
(source_date, total_entries, system_status, other, processed_at)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (yesterday, inserted_count,
|
||||
sum(1 for e in entries if e['entry_type'] == 'system_status'),
|
||||
sum(1 for e in entries if e['entry_type'] not in ['bug', 'action_item', 'decision', 'system_status']),
|
||||
datetime.now().isoformat()))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print(f"\n=== Summary ===")
|
||||
print(f"Entries extracted: {inserted_count}")
|
||||
print(f"Written to database: C:\\Users\\admin\\.openclaw\\memory.db")
|
||||
Reference in New Issue
Block a user