add all files

This commit is contained in:
Rucus
2026-02-17 09:29:34 -06:00
parent b8c8d67c67
commit 782d203799
21925 changed files with 2433086 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Utilities for extracting database context artifacts for the SQL agent."""

View File

@@ -0,0 +1,5 @@
from .cli import main
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,46 @@
"""Command-line interface for database context extraction jobs."""
from __future__ import annotations
import argparse
from pathlib import Path
from . import schema_snapshot
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Extract schema and contextual artifacts for the SQL agent.",
)
parser.add_argument(
"--schema",
default=None,
help="Optional SQL Server schema to filter on (defaults to all schemas).",
)
parser.add_argument(
"--output",
default="db_agent/context/schema.json",
help="Path to write the schema JSON document.",
)
parser.add_argument(
"--job",
choices=["schema"],
default="schema",
help="Extraction job to run (currently only 'schema').",
)
return parser.parse_args()
def main() -> None:
args = parse_args()
output_path = Path(args.output)
if args.job == "schema":
result = schema_snapshot.run(schema=args.schema, output=output_path)
print(f"Schema document written to {result}")
else:
raise RuntimeError(f"Unsupported job: {args.job}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,175 @@
"""Extract schema metadata from the SugarScale MSSQL database."""
from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List
import sqlalchemy as sa
from sqlalchemy.engine import Engine
from .settings import DbSettings
@dataclass
class TableColumn:
table_schema: str
table_name: str
column_name: str
data_type: str
is_nullable: bool
column_default: str | None
character_maximum_length: int | None
@dataclass
class ForeignKeyRelation:
source_schema: str
source_table: str
source_column: str
target_schema: str
target_table: str
target_column: str
def create_engine(settings: DbSettings) -> Engine:
connection_uri = (
"mssql+pyodbc:///?odbc_connect=" + sa.engine.url.quote_plus(settings.connection_string())
)
return sa.create_engine(connection_uri, fast_executemany=True)
def fetch_columns(engine: Engine, schema: str | None = None) -> List[TableColumn]:
columns: List[TableColumn] = []
query = sa.text(
"""
SELECT
TABLE_SCHEMA,
TABLE_NAME,
COLUMN_NAME,
DATA_TYPE,
IS_NULLABLE,
COLUMN_DEFAULT,
CHARACTER_MAXIMUM_LENGTH
FROM INFORMATION_SCHEMA.COLUMNS
WHERE (:schema IS NULL OR TABLE_SCHEMA = :schema)
ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION
"""
)
with engine.connect() as conn:
result = conn.execute(query, {"schema": schema})
for row in result:
columns.append(
TableColumn(
table_schema=row.TABLE_SCHEMA,
table_name=row.TABLE_NAME,
column_name=row.COLUMN_NAME,
data_type=row.DATA_TYPE,
is_nullable=(row.IS_NULLABLE == "YES"),
column_default=row.COLUMN_DEFAULT,
character_maximum_length=row.CHARACTER_MAXIMUM_LENGTH,
)
)
return columns
def fetch_foreign_keys(engine: Engine, schema: str | None = None) -> List[ForeignKeyRelation]:
relations: List[ForeignKeyRelation] = []
query = sa.text(
"""
SELECT
fk_tab.name AS source_table,
fk_col.name AS source_column,
fk_sch.name AS source_schema,
pk_tab.name AS target_table,
pk_col.name AS target_column,
pk_sch.name AS target_schema
FROM sys.foreign_key_columns fkc
INNER JOIN sys.tables fk_tab ON fkc.parent_object_id = fk_tab.object_id
INNER JOIN sys.schemas fk_sch ON fk_tab.schema_id = fk_sch.schema_id
INNER JOIN sys.columns fk_col ON fkc.parent_object_id = fk_col.object_id AND fkc.parent_column_id = fk_col.column_id
INNER JOIN sys.tables pk_tab ON fkc.referenced_object_id = pk_tab.object_id
INNER JOIN sys.schemas pk_sch ON pk_tab.schema_id = pk_sch.schema_id
INNER JOIN sys.columns pk_col ON fkc.referenced_object_id = pk_col.object_id AND fkc.referenced_column_id = pk_col.column_id
WHERE (:schema IS NULL OR fk_sch.name = :schema)
ORDER BY fk_sch.name, fk_tab.name
"""
)
with engine.connect() as conn:
result = conn.execute(query, {"schema": schema})
for row in result:
relations.append(
ForeignKeyRelation(
source_schema=row.source_schema,
source_table=row.source_table,
source_column=row.source_column,
target_schema=row.target_schema,
target_table=row.target_table,
target_column=row.target_column,
)
)
return relations
def assemble_schema_document(columns: Iterable[TableColumn], relations: Iterable[ForeignKeyRelation]) -> Dict[str, Any]:
schema_doc: Dict[str, Any] = {"tables": {}}
for col in columns:
table_key = f"{col.table_schema}.{col.table_name}"
table_entry = schema_doc["tables"].setdefault(
table_key,
{
"schema": col.table_schema,
"name": col.table_name,
"columns": [],
"relationships": [],
},
)
table_entry["columns"].append(
{
"name": col.column_name,
"type": col.data_type,
"nullable": col.is_nullable,
"default": col.column_default,
"max_length": col.character_maximum_length,
}
)
for rel in relations:
table_key = f"{rel.source_schema}.{rel.source_table}"
table_entry = schema_doc["tables"].setdefault(
table_key,
{
"schema": rel.source_schema,
"name": rel.source_table,
"columns": [],
"relationships": [],
},
)
table_entry["relationships"].append(
{
"target": f"{rel.target_schema}.{rel.target_table}",
"source_column": rel.source_column,
"target_column": rel.target_column,
}
)
return schema_doc
def write_schema_json(output_path: Path, schema_doc: Dict[str, Any]) -> None:
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("w", encoding="utf-8") as f:
json.dump(schema_doc, f, indent=2, sort_keys=True)
def run(schema: str | None = None, output: Path | None = None) -> Path:
settings = DbSettings.from_env()
engine = create_engine(settings)
columns = fetch_columns(engine, schema=schema)
relations = fetch_foreign_keys(engine, schema=schema)
schema_doc = assemble_schema_document(columns, relations)
output_path = output or Path("db_agent/context/schema.json")
write_schema_json(output_path, schema_doc)
return output_path

View File

@@ -0,0 +1,81 @@
"""Configuration helpers for connecting to the MSSQL sandbox."""
from __future__ import annotations
import os
from dataclasses import dataclass
@dataclass(frozen=True)
class DbSettings:
"""Environment-driven connection settings for SQL Server.
Required variables:
- DB_SERVER (e.g.,
- DB_DATABASE (e.g., "SugarScale_Lasuca")
- DB_USERNAME (database login)
- DB_PASSWORD (database password)
Optional overrides:
- DB_DRIVER (defaults to "$env")
- DB_ENCRYPT ("true" to enable TLS)
- DB_TRUST_CERT ("true" to trust the server certificate)
"""
server: str
database: str
username: str
password: str
driver: str = "ODBC Driver 18 for SQL Server"
encrypt: bool = True
trust_server_certificate: bool = True
@classmethod
def from_env(cls) -> "DbSettings":
missing: list[str] = []
server = os.getenv("DB_SERVER")
if not server:
missing.append("DB_SERVER")
database = os.getenv("DB_DATABASE")
if not database:
missing.append("DB_DATABASE")
username = os.getenv("DB_USERNAME")
if not username:
missing.append("DB_USERNAME")
password = os.getenv("DB_PASSWORD")
if not password:
missing.append("DB_PASSWORD")
if missing:
raise RuntimeError(
"Missing required environment variables: " + ", ".join(missing)
)
driver = os.getenv("DB_DRIVER", "ODBC Driver 18 for SQL Server")
encrypt = os.getenv("DB_ENCRYPT", "true").lower() == "true"
trust = os.getenv("DB_TRUST_CERT", "true").lower() == "true"
return cls(
server=server,
database=database,
username=username,
password=password,
driver=driver,
encrypt=encrypt,
trust_server_certificate=trust,
)
def connection_string(self) -> str:
parts = [
f"DRIVER={{{self.driver}}}",
f"SERVER={self.server}",
f"DATABASE={self.database}",
f"UID={self.username}",
f"PWD={self.password}",
]
if self.encrypt:
parts.append("Encrypt=yes")
if self.trust_server_certificate:
parts.append("TrustServerCertificate=yes")
parts.append("MARS_Connection=yes")
return ";".join(parts)