class SchemaManager:
"""Manages database schema for spell checker dictionaries.
Provides centralized definitions for all tables and indexes,
ensuring consistency across dictionary builds.
Args:
conn: SQLite database connection
cursor: SQLite database cursor
console: Optional PipelineConsole for output
"""
# Table definitions
TABLES = {
"syllables": '''
CREATE TABLE IF NOT EXISTS syllables (
id INTEGER PRIMARY KEY AUTOINCREMENT,
syllable TEXT UNIQUE NOT NULL,
frequency INTEGER DEFAULT 0
)
''',
"words": '''
CREATE TABLE IF NOT EXISTS words (
id INTEGER PRIMARY KEY AUTOINCREMENT,
word TEXT UNIQUE NOT NULL,
syllable_count INTEGER,
frequency INTEGER DEFAULT 0,
pos_tag TEXT,
is_curated INTEGER DEFAULT 0,
inferred_pos TEXT,
inferred_confidence REAL,
inferred_source TEXT
)
''',
"bigrams": '''
CREATE TABLE IF NOT EXISTS bigrams (
id INTEGER PRIMARY KEY AUTOINCREMENT,
word1_id INTEGER,
word2_id INTEGER,
probability REAL DEFAULT 0.0,
count INTEGER DEFAULT 0,
FOREIGN KEY(word1_id) REFERENCES words(id),
FOREIGN KEY(word2_id) REFERENCES words(id),
UNIQUE(word1_id, word2_id)
)
''',
"trigrams": '''
CREATE TABLE IF NOT EXISTS trigrams (
id INTEGER PRIMARY KEY AUTOINCREMENT,
word1_id INTEGER,
word2_id INTEGER,
word3_id INTEGER,
probability REAL DEFAULT 0.0,
count INTEGER DEFAULT 0,
FOREIGN KEY(word1_id) REFERENCES words(id),
FOREIGN KEY(word2_id) REFERENCES words(id),
FOREIGN KEY(word3_id) REFERENCES words(id),
UNIQUE(word1_id, word2_id, word3_id)
)
''',
"pos_unigrams": '''
CREATE TABLE IF NOT EXISTS pos_unigrams (
pos TEXT UNIQUE NOT NULL,
probability REAL DEFAULT 0.0
)
''',
"pos_bigrams": '''
CREATE TABLE IF NOT EXISTS pos_bigrams (
pos1 TEXT NOT NULL,
pos2 TEXT NOT NULL,
probability REAL DEFAULT 0.0,
UNIQUE(pos1, pos2)
)
''',
"pos_trigrams": '''
CREATE TABLE IF NOT EXISTS pos_trigrams (
pos1 TEXT NOT NULL,
pos2 TEXT NOT NULL,
pos3 TEXT NOT NULL,
probability REAL DEFAULT 0.0,
UNIQUE(pos1, pos2, pos3)
)
''',
"processed_files": '''
CREATE TABLE IF NOT EXISTS processed_files (
path TEXT PRIMARY KEY,
mtime REAL,
size INTEGER
)
''',
}
# Index definitions for query optimization
INDEXES = {
"idx_syllables_text": "CREATE INDEX IF NOT EXISTS idx_syllables_text ON syllables(syllable)",
"idx_words_text": "CREATE INDEX IF NOT EXISTS idx_words_text ON words(word)",
"idx_bigrams_w1_w2": "CREATE INDEX IF NOT EXISTS idx_bigrams_w1_w2 ON bigrams(word1_id, word2_id)",
"idx_trigrams_w1_w2_w3": "CREATE INDEX IF NOT EXISTS idx_trigrams_w1_w2_w3 ON trigrams(word1_id, word2_id, word3_id)",
}