Documentation Index
Fetch the complete documentation index at: https://docs.myspellchecker.com/llms.txt
Use this file to discover all available pages before exploring further.
When a word fails validation, mySpellChecker generates correction candidates. The SuggestionStrategy protocol lets you swap the algorithm (SymSpell, phonetic matching, or a custom approach) without changing the rest of the pipeline.
Overview
from myspellchecker.algorithms.suggestion_strategy import (
SuggestionStrategy,
SuggestionContext,
SuggestionResult,
BaseSuggestionStrategy,
CompositeSuggestionStrategy,
)
# Create a composite strategy combining multiple approaches
composite = CompositeSuggestionStrategy(
strategies=[symspell_strategy, context_strategy],
ranker=UnifiedRanker(),
)
context = SuggestionContext(prev_words=["မြန်မာ"])
result = composite.suggest("နိင်ငံ", context)
print(result.terms) # ["နိုင်ငံ", ...]
SuggestionStrategy Protocol
The core protocol defining the suggestion interface:
from typing import Protocol, Optional, List, Sequence
@runtime_checkable
class SuggestionStrategy(Protocol):
"""Protocol for suggestion generation strategies."""
@property
def name(self) -> str:
"""Return the strategy name for identification."""
...
def suggest(
self,
term: str,
context: Optional[SuggestionContext] = None,
) -> SuggestionResult:
"""Generate suggestions for a term."""
...
def suggest_batch(
self,
terms: Sequence[str],
contexts: Optional[Sequence[SuggestionContext]] = None,
) -> List[SuggestionResult]:
"""Generate suggestions for multiple terms."""
...
def supports_context(self) -> bool:
"""Check if strategy uses contextual information."""
...
SuggestionContext
Context information for generating suggestions:
@dataclass
class SuggestionContext:
"""Context information for generating suggestions."""
prev_words: List[str] = field(default_factory=list) # Left context
next_words: List[str] = field(default_factory=list) # Right context
sentence: Optional[str] = None # Full sentence
position: int = 0 # Position in sentence
max_suggestions: int = 5 # Max results
max_edit_distance: int = 2 # Max edits
include_self: bool = False # Include input if valid
Creating Context
# Empty context for simple lookups
context = SuggestionContext()
# Full context
context = SuggestionContext(
prev_words=["မြန်မာ"],
next_words=["သည်"],
sentence="မြန်မာ နိင်ငံ သည်",
position=1,
max_suggestions=5,
max_edit_distance=2,
)
SuggestionResult
Results from suggestion generation:
@dataclass
class SuggestionResult:
"""Result of a suggestion generation operation."""
suggestions: List[SuggestionData] # Ranked suggestions
strategy_name: str # Strategy identifier
metadata: dict = field(default_factory=dict)
is_truncated: bool = False # True if results limited
Accessing Results
result = strategy.suggest("typo", context)
# Check if suggestions found
if result:
print(f"Found {len(result)} suggestions")
# Get best suggestion
best = result.best
if best:
print(f"Best: {best.term} (confidence: {best.confidence})")
# Get just the terms
terms = result.terms # ["suggestion1", "suggestion2", ...]
# Access metadata
print(result.strategy_name) # "symspell"
print(result.is_truncated) # True if more suggestions available
BaseSuggestionStrategy
Base class with common functionality:
class BaseSuggestionStrategy:
"""Base class for suggestion strategies."""
def __init__(self, max_suggestions: int = 5, max_edit_distance: int = 2):
self._max_suggestions = max_suggestions
self._max_edit_distance = max_edit_distance
@property
def name(self) -> str:
return "base"
def supports_context(self) -> bool:
return False
def _create_result(
self,
suggestions: List[SuggestionData],
max_suggestions: Optional[int] = None,
metadata: Optional[dict] = None,
) -> SuggestionResult:
"""Create result with truncation handling."""
...
Implementing a Custom Strategy
class PhoneticStrategy(BaseSuggestionStrategy):
"""Phonetic similarity-based suggestions."""
def __init__(self, phonetic_hasher, provider, **kwargs):
super().__init__(**kwargs)
self.hasher = phonetic_hasher
self.provider = provider
@property
def name(self) -> str:
return "phonetic"
def suggest(
self,
term: str,
context: Optional[SuggestionContext] = None,
) -> SuggestionResult:
# Generate phonetic hash for the term
target_hash = self.hasher.encode(term)
# Find phonetically similar words by comparing hashes
# (This requires iterating through candidates or maintaining a hash index)
candidates = []
for word, freq in self.provider.get_all_words():
if self.hasher.encode(word) == target_hash:
candidates.append((word, freq))
# Create suggestion data
suggestions = [
SuggestionData(
term=word,
edit_distance=self._compute_distance(term, word),
frequency=freq,
confidence=self._compute_confidence(term, word),
)
for word, freq in candidates
]
return self._create_result(suggestions)
CompositeSuggestionStrategy
Combines multiple strategies with unified ranking:
class CompositeSuggestionStrategy(BaseSuggestionStrategy):
"""Combines multiple strategies and merges results."""
def __init__(
self,
strategies: List[SuggestionStrategy],
ranker: Optional[SuggestionRanker] = None,
max_suggestions: int = 5,
deduplicate: bool = True,
):
super().__init__(max_suggestions=max_suggestions)
self._strategies = strategies
self._deduplicate = deduplicate
self._ranker = ranker or UnifiedRanker()
Using Composite Strategy
from myspellchecker.algorithms.suggestion_strategy import CompositeSuggestionStrategy
from myspellchecker.algorithms.ranker import UnifiedRanker
# Combine edit distance and phonetic strategies
composite = CompositeSuggestionStrategy(
strategies=[
SymSpellStrategy(provider),
PhoneticStrategy(hasher, provider),
ContextStrategy(ngram_checker),
],
ranker=UnifiedRanker(),
max_suggestions=10,
deduplicate=True,
)
# Get combined suggestions
result = composite.suggest("typo", context)
# Metadata shows which strategies contributed
print(result.metadata["strategies"]) # ["symspell", "phonetic", "context"]
Context Support
# Check if composite supports context
if composite.supports_context():
# At least one sub-strategy uses context
result = composite.suggest(term, context)
else:
result = composite.suggest(term)
Strategy Types
Morpheme-Level Strategy
Corrects typos inside compound words and reduplications by fixing individual morphemes:
from myspellchecker.algorithms.morpheme_suggestion_strategy import MorphemeSuggestionStrategy
morpheme_strategy = MorphemeSuggestionStrategy(
compound_resolver=resolver, # CompoundResolver instance
reduplication_engine=engine, # ReduplicationEngine instance
symspell=symspell, # For morpheme-level corrections
dictionary_check=dict_check, # Dictionary lookup function
max_suggestions=3,
)
# Compound typo: ကျောင်းသာ (typo in "သာ", should be "သား")
result = morpheme_strategy.suggest("ကျောင်းသာ")
# Suggests: "ကျောင်းသား" (corrected morpheme)
# Reduplication typo: ကောင်းကောင် (incomplete reduplication)
result = morpheme_strategy.suggest("ကောင်းကောင်")
# Suggests: "ကောင်းကောင်း" (completed reduplication)
The strategy is automatically included in the CompositeSuggestionStrategy pipeline
when ReduplicationEngine or CompoundResolver are enabled. Suggestions use
source="morpheme" with confidence 0.80-0.85, weighted by source_weight_morpheme=0.85
in RankerConfig.
Generates candidates by swapping, inserting, or deleting Myanmar medials (ျ↔ြ, ွ↔ှ). This targets the #1 error type in Myanmar text, ya-pin/ya-yit confusion, which SymSpell’s delete-distance model cannot reliably find as edit-distance-1 candidates.
from myspellchecker.algorithms.medial_swap_strategy import MedialSwapSuggestionStrategy
medial_strategy = MedialSwapSuggestionStrategy(
dictionary_check=provider.is_valid_word, # Dictionary lookup function
get_frequency=provider.get_word_frequency, # Frequency lookup function
max_suggestions=5,
max_variants_per_word=8,
confidence=0.90,
include_insertions=True, # Add missing medials (e.g., မာ → မှာ)
include_deletions=True, # Remove extra medials
include_anusvara=True, # ံ insert/delete variants
)
# Medial swap: ကျောင်း → ကြောင်း (ya-pin → ya-yit)
result = medial_strategy.suggest("ကျောင်း")
# Suggests: "ကြောင်း" (swapped medial)
Algorithm:
- Scan word for medial characters (U+103B-U+103E)
- Generate swap variants using configured pairs (ျ↔ြ, ွ↔ှ)
- Generate insertion variants (add missing medial after consonant)
- Generate deletion variants (remove extra medial)
- Optionally generate anusvara (ံ) insert/delete variants
- Validate each variant against the dictionary
- Return valid variants as
SuggestionData with source="medial_swap"
Rules are loaded from rules/medial_swap_pairs.yaml and can be customized via the rules_path parameter. Performance is O(1) per variant (3-8 variants per word).
The strategy is automatically included in the suggestion pipeline and its candidates use source_weight_medial_swap=1.0 in RankerConfig.
Edit Distance Strategies
class SymSpellSuggestionStrategy(BaseSuggestionStrategy):
"""SymSpell-based O(1) suggestions."""
def suggest(self, term, context=None):
suggestions = self.symspell.lookup(
term,
level=self._validation_level,
max_suggestions=self._max_suggestions,
)
return self._create_result(suggestions)
Context-Aware Strategies
class ContextStrategy(BaseSuggestionStrategy):
"""N-gram context-aware suggestions."""
def supports_context(self) -> bool:
return True
def suggest(self, term, context=None):
if context and context.prev_words:
# Use context for scoring
candidates = self._get_candidates(term)
scored = self._score_with_context(candidates, context)
return self._create_result(scored)
else:
# Fall back to non-contextual
return self._create_result([])
Phonetic Strategies
class PhoneticStrategy(BaseSuggestionStrategy):
"""Sound-alike suggestions."""
def suggest(self, term, context=None):
phonetic_hash = self.hasher.encode(term)
candidates = self.find_similar_hashes(phonetic_hash)
return self._create_result(candidates)
Semantic Strategies
class SemanticStrategy(BaseSuggestionStrategy):
"""ML-based semantic suggestions."""
def supports_context(self) -> bool:
return True
def suggest(self, term, context=None):
if not context or not context.sentence:
return SuggestionResult([], self.name)
# Use masked LM to predict
masked = context.sentence.replace(term, "[MASK]")
predictions = self.model.predict(masked)
return self._create_result(predictions)
Batch Processing
# Process multiple terms
terms = ["typo1", "typo2", "typo3"]
contexts = [SuggestionContext(["prev"]) for _ in terms]
# Batch suggest
results = strategy.suggest_batch(terms, contexts)
for term, result in zip(terms, results):
print(f"{term}: {result.terms[:3]}")
Integration
With SpellChecker
Suggestion strategies are not passed directly to SpellChecker. Instead, they are
wired internally via the WordValidator, which receives a CompositeSuggestionStrategy
through the DI container and factory system:
from myspellchecker.core.spellchecker import SpellChecker
from myspellchecker.providers.sqlite import SQLiteProvider
# SpellChecker uses suggestion strategies internally via WordValidator.
# The strategy pipeline is assembled by the DI container during construction.
checker = SpellChecker(provider=SQLiteProvider(database_path="my.db"))
# check() returns a Response with errors that include ranked suggestions
result = checker.check("မြန်စာ")
for error in result.errors:
print(f"{error.text}: {error.suggestions[:3]}")
To customize which strategies are used, configure the SpellCheckerConfig and
pass it to SpellChecker. The WordValidator internally creates and composes
strategies (SymSpell, Morphology, Compound, Morpheme, Context) based on the
config and available components.
With Algorithm Factory
from myspellchecker.algorithms.factory import AlgorithmFactory
factory = AlgorithmFactory(provider)
# Create algorithm components using factory
symspell = factory.create_symspell()
semantic_checker = factory.create_semantic_checker()
# Components can be used directly for suggestions and context checking
See Also