@click.command()
@click.argument(
'inputs',
metavar='FILEPATHS',
required=True,
nargs=-1,
)
@click.option(
'-o',
'--output-dir',
metavar='PATH',
required=True,
help='Output directory for encoded shards.',
)
@click.option(
'-s',
'--size',
metavar='SIZE',
required=True,
help='Max data size of each shard.',
)
@click.option(
'-f',
'--format',
default='shard-{index}.txt',
help='Shard name format where {index} is replaced by shard index.',
)
@click.option(
'--shuffle/--no-shuffle',
default=False,
help='Shuffle documents before sharding.',
)
@click.option(
'--log-level',
default='INFO',
type=click.Choice(
['DEBUG', 'INFO', 'WARNING', 'ERROR'],
case_sensitive=False,
),
help='Minimum logging level.',
)
@click.option(
'--rich/--no-rich',
default=False,
help='Use rich output formatting.',
)
def cli(
inputs: tuple[str],
output_dir: str,
size: str,
format: str, # noqa: A002
shuffle: bool,
log_level: str,
rich: bool,
) -> None:
"""Shard documents in FILEPATHS into equally sized files."""
init_logging(log_level, rich=rich)
size_bytes = readable_to_bytes(size)
shard(inputs, output_dir, format, size_bytes, shuffle)