Documentation Index
Fetch the complete documentation index at: https://docs.myspellchecker.com/llms.txt
Use this file to discover all available pages before exploring further.
Utility functions used internally by the data pipeline for pre-flight checks like disk space verification before building dictionaries.
Overview
from myspellchecker.utils.io_utils import check_disk_space
# Check if enough disk space before building database
check_disk_space("/path/to/output", required_mb=500)
Functions
check_disk_space
Verify that a disk partition has sufficient free space:
from myspellchecker.utils.io_utils import check_disk_space
def check_disk_space(
path: Union[str, Path],
required_mb: int = 500,
) -> None:
"""Check if disk partition has enough free space.
Args:
path: Path to check (directory or file)
required_mb: Minimum required free space in MB (default: 500MB)
Raises:
InsufficientStorageError: If free space is less than required.
"""
Usage
from pathlib import Path
from myspellchecker.utils.io_utils import check_disk_space
from myspellchecker.core.exceptions import InsufficientStorageError
# Basic check
try:
check_disk_space("/tmp/output.db", required_mb=500)
print("Sufficient disk space available")
except InsufficientStorageError as e:
print(f"Not enough space: {e}")
# Check with Path object
output_path = Path("/data/dictionaries/mydict.db")
check_disk_space(output_path, required_mb=1000)
# Check directory
check_disk_space("/var/data", required_mb=2000)
Behavior
Path Resolution
The function handles various path scenarios:
# Absolute path
check_disk_space("/home/user/data/output.db")
# Relative path (converted to absolute)
check_disk_space("./output.db")
# Non-existent path (checks nearest existing parent)
check_disk_space("/data/new_dir/new_subdir/output.db")
# Checks /data/new_dir, then /data, then / until existing path found
Error Conditions
from myspellchecker.core.exceptions import InsufficientStorageError
try:
check_disk_space("/tmp", required_mb=1000000) # 1TB
except InsufficientStorageError as e:
print(e)
# "Insufficient disk space at /tmp. Available: 50000.00 MB, Required: 1000000 MB."
Silent Failures
Some errors are logged but don’t raise exceptions:
# FileNotFoundError - logged as warning
check_disk_space("/nonexistent/mount/point")
# Warning: "Could not check disk space for /nonexistent/mount/point"
# OSError - logged as warning
check_disk_space("/inaccessible/path")
# Warning: "Error checking disk space: ..."
Integration with Pipeline
Pre-Build Check
from myspellchecker.data_pipeline import Pipeline
from myspellchecker.utils.io_utils import check_disk_space
class Pipeline:
def run(self, input_path: Path, output_path: Path):
# Check disk space before starting
estimated_size = self._estimate_output_size(input_path)
check_disk_space(output_path, required_mb=estimated_size * 1.5)
# Proceed with build
self._run_pipeline()
With Reporter
from myspellchecker.utils.io_utils import check_disk_space
from myspellchecker.core.exceptions import InsufficientStorageError
def build_dictionary(input_path, output_path, reporter):
try:
check_disk_space(output_path, required_mb=500)
reporter.report_info("Disk space check passed")
except InsufficientStorageError as e:
reporter.report_error(str(e))
raise
InsufficientStorageError
Custom exception for disk space issues:
from myspellchecker.core.exceptions import InsufficientStorageError
class InsufficientStorageError(MyanmarSpellcheckError):
"""Raised when disk space is insufficient for an operation."""
pass
Handling the Exception
from myspellchecker.core.exceptions import InsufficientStorageError
try:
check_disk_space(path, required_mb=500)
except InsufficientStorageError:
# Option 1: Clean up and retry
cleanup_temp_files()
check_disk_space(path, required_mb=500)
# Option 2: Use alternative location
check_disk_space("/tmp", required_mb=500)
# Option 3: Inform user
print("Please free up disk space and try again")
The function uses shutil.disk_usage() internally:
import shutil
# Get disk usage details
total, used, free = shutil.disk_usage("/path")
print(f"Total: {total / (1024**3):.2f} GB")
print(f"Used: {used / (1024**3):.2f} GB")
print(f"Free: {free / (1024**3):.2f} GB")
Best Practices
1. Check Before Large Operations
def build_large_dictionary(corpus_path, output_path):
# Estimate required space
corpus_size = corpus_path.stat().st_size
estimated_db_size = corpus_size * 3 # Database typically 3x corpus
# Add safety margin
required_mb = (estimated_db_size / (1024 * 1024)) * 1.5
check_disk_space(output_path, required_mb=int(required_mb))
2. Check Multiple Locations
def setup_workspace(output_dir, temp_dir):
# Check both output and temp locations
check_disk_space(output_dir, required_mb=500)
check_disk_space(temp_dir, required_mb=200)
3. Graceful Degradation
def safe_build(input_path, output_path):
try:
check_disk_space(output_path, required_mb=500)
except InsufficientStorageError:
logger.warning("Low disk space, using minimal build options")
# Use smaller batch sizes, skip optional features
return build_minimal(input_path, output_path)
return build_full(input_path, output_path)
See Also