Building CLI Tools with Python
Command-line tools are the unsung heroes of data work. Well-designed CLIs make automation reliable and team collaboration smoother.
Why CLI?
- Easy to script and automate
- Works in any environment (local, CI/CD, containers)
- Clear interface documentation
- Composable with other tools
Library Choices
Click (Recommended)
import click
@click.group()
def cli():
"""Data processing toolkit."""
pass
@cli.command()
@click.argument('input_file', type=click.Path(exists=True))
@click.option('--output', '-o', default='output.csv')
@click.option('--verbose', '-v', is_flag=True)
def process(input_file, output, verbose):
"""Process input data file."""
if verbose:
click.echo(f"Processing {input_file}...")
# Processing logic here
result = do_processing(input_file)
result.to_csv(output)
click.echo(f"Saved to {output}")
if __name__ == '__main__':
cli()
Typer (Modern Alternative)
import typer
from pathlib import Path
app = typer.Typer()
@app.command()
def process(
input_file: Path,
output: Path = Path("output.csv"),
verbose: bool = False
):
"""Process input data file."""
if verbose:
typer.echo(f"Processing {input_file}...")
# Processing logic
result = do_processing(input_file)
result.to_csv(output)
typer.echo(f"Saved to {output}")
if __name__ == "__main__":
app()
Best Practices
1. Progressive Output
from tqdm import tqdm
for item in tqdm(items, desc="Processing"):
process_item(item)
2. Proper Exit Codes
import sys
def main():
try:
result = run_process()
sys.exit(0) # Success
except ValidationError as e:
click.echo(f"Error: {e}", err=True)
sys.exit(1) # User error
except Exception as e:
click.echo(f"Internal error: {e}", err=True)
sys.exit(2) # System error
3. Configuration Files
import yaml
@click.command()
@click.option('--config', type=click.Path(exists=True))
def run(config):
"""Run with configuration file."""
if config:
with open(config) as f:
cfg = yaml.safe_load(f)
else:
cfg = default_config()
execute(cfg)
4. Logging Setup
import logging
@click.option('--log-level', default='INFO')
def main(log_level):
logging.basicConfig(
level=getattr(logging, log_level),
format='%(asctime)s - %(levelname)s - %(message)s'
)
Distribution
# pyproject.toml
[project.scripts]
mytool = "mypackage.cli:main"
# Install and use
pip install .
mytool process data.csv --output result.csv
Good CLI design makes the difference between a tool that gets used and one that gets abandoned.