llm.environment

llm/environment.py

Utilities for collecting information about the environment.

Tip

This module is executable so you can easily check what resources your scripts will see as available. This is useful if you need to debug what software versions are being used or what hardware is visible by PyTorch.

python -m llm.environment

Environment ¶

Bases: NamedTuple

Named tuple representing collected environment information.

collect_pip_version ¶

collect_pip_version() -> str

Collect the pip version.

Source code in llm/environment.py

def collect_pip_version() -> str:
    """Collect the pip version."""
    output = subprocess.check_output(['pip', '--version']).decode('utf-8')
    return output.split(' ')[1]

collect_pip_packages ¶

collect_pip_packages() -> list[str]

Collect a list of relevant pip packages.

Source code in llm/environment.py

def collect_pip_packages() -> list[str]:
    """Collect a list of relevant pip packages."""
    output = subprocess.check_output(['pip', 'freeze']).decode('utf-8')
    packages = output.split('\n')
    names = [
        'torch',
        'numpy',
        'mypy',
        'colossalai',
        'h5py',
        'tensorboard',
        'tokenizers',
        'transformers',
    ]
    packages = [
        p.strip() for p in packages if any(name in p for name in names)
    ]
    return sorted(packages)

collect_environment ¶

collect_environment() -> Environment

Collects information on the hardware and software environment.

Source code in llm/environment.py

def collect_environment() -> Environment:
    """Collects information on the hardware and software environment."""
    run_lambda = collect_env.run

    bit_count = sys.maxsize.bit_length() + 1
    sys_version = sys.version.replace('\n', ' ')

    pip_version = collect_pip_version()
    pip_packages = collect_pip_packages()
    version_str = torch.__version__
    debug_mode_str = torch.version.debug

    pcores = psutil.cpu_count(logical=False)
    lcores = psutil.cpu_count(logical=True)
    cpu_info = f'{platform.processor()} ({pcores} cores / {lcores} logical)'
    total_ram = round(psutil.virtual_memory().available / 1e9, 2)

    cuda_available_str = torch.cuda.is_available()
    cuda_version_str = torch.version.cuda

    return Environment(
        os=collect_env.get_os(run_lambda),
        python_version=f'{sys_version} ({bit_count}-bit runtime)',
        python_platform=collect_env.get_python_platform(),
        pip_version=pip_version,
        pip_packages='\n'.join(pip_packages),
        torch_version=version_str,
        torch_is_debug=debug_mode_str,
        cpu_info=cpu_info,
        total_ram_gb=total_ram,
        cuda_is_available=cuda_available_str,
        cuda_compiled_version=cuda_version_str,
        cuda_runtime_version=collect_env.get_running_cuda_version(run_lambda),
        cuda_module_loading=collect_env.get_cuda_module_loading_config(),
        nvidia_gpu_models=collect_env.get_gpu_info(run_lambda),
        nvidia_driver=collect_env.get_nvidia_driver_version(run_lambda),
        cudnn_version=collect_env.get_cudnn_version(run_lambda),
    )

log_environment ¶

log_environment(
    level: int = logging.INFO,
    ranks: Iterable[int] | None = (0,),
) -> None

Log the hardware and software environment.

Parameters:

level (int, default: INFO ) –

Logging level.
ranks (Iterable[int] | None, default: (0,) ) –

Ranks to log the environment on. If None, logs on all ranks.

Source code in llm/environment.py

def log_environment(
    level: int = logging.INFO,
    ranks: Iterable[int] | None = (0,),
) -> None:
    """Log the hardware and software environment.

    Args:
        level: Logging level.
        ranks: Ranks to log the environment on. If `None`, logs on all ranks.
    """
    env = collect_environment()
    env_str = ENVIRONMENT_FORMAT.format(**env._asdict())
    logger.log(
        level,
        f'Runtime environment:\n{env_str}',
        extra={'ranks': ranks},
    )