Skip to content

mm #

Compute differentiable ensemble averages using OpenMM and SMEE.

Classes:

  • GenerateCoordsConfig

    Configure how coordinates should be generated for a system using PACKMOL.

  • MinimizationConfig

    Configure how a system should be energy minimized.

  • NotEnoughSamplesError

    An error raised when an ensemble average is attempted with too few samples.

  • TensorReporter

    A reporter which stores coords, box vectors, reduced potentials and kinetic

Functions:

GenerateCoordsConfig pydantic-model #

Bases: BaseModel

Configure how coordinates should be generated for a system using PACKMOL.

MinimizationConfig pydantic-model #

Bases: BaseModel

Configure how a system should be energy minimized.

NotEnoughSamplesError #

Bases: ValueError

An error raised when an ensemble average is attempted with too few samples.

TensorReporter #

TensorReporter(
    output_file: BinaryIO,
    report_interval: int,
    beta: Quantity,
    pressure: Quantity | None,
)

A reporter which stores coords, box vectors, reduced potentials and kinetic energy using msgpack.

report_interval: The interval (in steps) at which to write frames.
beta: The inverse temperature the simulation is being run at.
pressure: The pressure the simulation is being run at, or None if NVT /
    vacuum.
Source code in smee/mm/_reporters.py
def __init__(
    self,
    output_file: typing.BinaryIO,
    report_interval: int,
    beta: openmm.unit.Quantity,
    pressure: openmm.unit.Quantity | None,
):
    """

    Args:
        output_file: The file to write the frames to.
        report_interval: The interval (in steps) at which to write frames.
        beta: The inverse temperature the simulation is being run at.
        pressure: The pressure the simulation is being run at, or None if NVT /
            vacuum.
    """
    self._output_file = output_file
    self._report_interval = report_interval

    self._beta = beta
    self._pressure = (
        None if pressure is None else pressure * openmm.unit.AVOGADRO_CONSTANT_NA
    )

generate_system_coords #

generate_system_coords(
    system: TensorSystem,
    force_field: TensorForceField | None,
    config: Optional[GenerateCoordsConfig] = None,
) -> tuple[Quantity, Quantity]

Generate coordinates for a system of molecules using PACKMOL.

Parameters:

  • system (TensorSystem) –

    The system to generate coordinates for.

  • force_field (TensorForceField | None) –

    The force field that describes the geometry of any virtual sites.

  • config (Optional[GenerateCoordsConfig], default: None ) –

    Configuration of how to generate the system coordinates.

Returns:

  • tuple[Quantity, Quantity]

    The coordinates with shape=(n_atoms, 3) and box vectors with shape=(3, 3)

Source code in smee/mm/_mm.py
def generate_system_coords(
    system: smee.TensorSystem,
    force_field: smee.TensorForceField | None,
    config: typing.Optional["smee.mm.GenerateCoordsConfig"] = None,
) -> tuple[openmm.unit.Quantity, openmm.unit.Quantity]:
    """Generate coordinates for a system of molecules using PACKMOL.

    Args:
        system: The system to generate coordinates for.
        force_field: The force field that describes the geometry of any virtual sites.
        config: Configuration of how to generate the system coordinates.

    Raises:
        * PACKMOLRuntimeError

    Returns:
        The coordinates with ``shape=(n_atoms, 3)`` and box vectors with
        ``shape=(3, 3)``
    """

    system = system.to("cpu")
    force_field = None if force_field is None else force_field.to("cpu")

    config = config if config is not None else smee.mm.GenerateCoordsConfig()

    box_size = _approximate_box_size(system, config)

    with tempfile.TemporaryDirectory() as tmp_dir:
        tmp_dir = pathlib.Path(tmp_dir)

        for i, topology in enumerate(system.topologies):
            xyz = _topology_to_xyz(topology, force_field)
            (tmp_dir / f"{i}.xyz").write_text(xyz)

        input_file = tmp_dir / "input.txt"
        input_file.write_text(
            _generate_packmol_input(system.n_copies, box_size, config)
        )

        with input_file.open("r") as file:
            result = subprocess.run(
                "packmol", stdin=file, capture_output=True, text=True, cwd=tmp_dir
            )

        if result.returncode != 0 or not result.stdout.find("Success!") > 0:
            raise PACKMOLRuntimeError(result.stdout)

        output_lines = (tmp_dir / "output.xyz").read_text().splitlines()

    coordinates = (
        numpy.array(
            [
                [float(coordinate) for coordinate in coordinate_line.split()[1:]]
                for coordinate_line in output_lines[2:]
                if len(coordinate_line) > 0
            ]
        )
        * openmm.unit.angstrom
    )

    box_vectors = numpy.eye(3) * (box_size + config.padding)
    return coordinates, box_vectors

simulate #

simulate(
    system: TensorSystem | TensorTopology,
    force_field: TensorForceField,
    coords: Quantity,
    box_vectors: Quantity | None,
    equilibrate_configs: list[
        Union[MinimizationConfig, SimulationConfig]
    ],
    production_config: SimulationConfig,
    production_reporters: list[Any] | None = None,
    apply_hmr: bool = False,
) -> State

Simulate a SMEE system of molecules or topology.

Parameters:

  • system (TensorSystem | TensorTopology) –

    The system / topology to simulate.

  • force_field (TensorForceField) –

    The force field to simulate with.

  • coords (Quantity) –

    The coordinates [Å] to use for the simulation. This should be a unit wrapped numpy array with shape=(n_atoms, 3).

  • box_vectors (Quantity | None) –

    The box vectors [Å] to use for the simulation if periodic. This should be a unit wrapped numpy array with shape=(3, 3).

  • equilibrate_configs (list[Union[MinimizationConfig, SimulationConfig]]) –

    A list of configurations defining the steps to run for equilibration. No data will be stored from these simulations.

  • production_config (SimulationConfig) –

    The configuration defining the production simulation to run.

  • production_reporters (list[Any] | None, default: None ) –

    A list of additional OpenMM reporters to use for the production simulation.

  • apply_hmr (bool, default: False ) –

    Whether to apply Hydrogen Mass Repartitioning to the system prior to simulation.

Source code in smee/mm/_mm.py
def simulate(
    system: smee.TensorSystem | smee.TensorTopology,
    force_field: smee.TensorForceField,
    coords: openmm.unit.Quantity,
    box_vectors: openmm.unit.Quantity | None,
    equilibrate_configs: list[
        typing.Union["smee.mm.MinimizationConfig", "smee.mm.SimulationConfig"]
    ],
    production_config: "smee.mm.SimulationConfig",
    production_reporters: list[typing.Any] | None = None,
    apply_hmr: bool = False,
) -> openmm.State:
    """Simulate a SMEE system of molecules or topology.

    Args:
        system: The system / topology to simulate.
        force_field: The force field to simulate with.
        coords: The coordinates [Å] to use for the simulation. This should be
            a unit wrapped numpy array with ``shape=(n_atoms, 3)``.
        box_vectors: The box vectors [Å] to use for the simulation if periodic. This
            should be a unit wrapped numpy array with ``shape=(3, 3)``.
        equilibrate_configs: A list of configurations defining the steps to run for
            equilibration. No data will be stored from these simulations.
        production_config: The configuration defining the production simulation to run.
        production_reporters: A list of additional OpenMM reporters to use for the
            production simulation.
        apply_hmr: Whether to apply Hydrogen Mass Repartitioning to the system prior
            to simulation.
    """

    assert isinstance(coords.value_in_unit(openmm.unit.angstrom), numpy.ndarray)
    assert isinstance(box_vectors.value_in_unit(openmm.unit.angstrom), numpy.ndarray)

    force_field = force_field.to("cpu")

    system: smee.TensorSystem = (
        system
        if isinstance(system, smee.TensorSystem)
        else smee.TensorSystem([system], [1], False)
    ).to("cpu")

    requires_pbc = any(
        config.pressure is not None
        for config in equilibrate_configs + [production_config]
        if isinstance(config, smee.mm.SimulationConfig)
    )

    if not system.is_periodic and requires_pbc:
        raise ValueError("pressure cannot be specified for a non-periodic system")

    platform = _get_platform(system.is_periodic)

    omm_state = coords, box_vectors

    omm_system = smee.converters.convert_to_openmm_system(force_field, system)
    omm_topology = smee.converters.convert_to_openmm_topology(system)

    if apply_hmr:
        _apply_hmr(omm_system, system)

    for i, config in enumerate(equilibrate_configs):
        _LOGGER.info(f"running equilibration step {i + 1} / {len(equilibrate_configs)}")

        if isinstance(config, smee.mm.MinimizationConfig):
            omm_state = _energy_minimize(omm_system, omm_state, platform, config)

        elif isinstance(config, smee.mm.SimulationConfig):
            omm_state = _run_simulation(
                omm_system, omm_topology, omm_state, platform, config, None
            )
        else:
            raise NotImplementedError

        _LOGGER.info(_get_state_log(omm_state))

    _LOGGER.info("running production simulation")
    omm_state = _run_simulation(
        omm_system,
        omm_topology,
        omm_state,
        platform,
        production_config,
        production_reporters,
    )
    _LOGGER.info(_get_state_log(omm_state))

    return omm_state

compute_ensemble_averages #

compute_ensemble_averages(
    system: TensorSystem,
    force_field: TensorForceField,
    frames_path: Path,
    temperature: Quantity,
    pressure: Quantity | None,
) -> tuple[dict[str, Tensor], dict[str, Tensor]]

Compute ensemble average of the potential energy, volume, density, and enthalpy (if running NPT) over an MD trajectory.

Parameters:

  • system (TensorSystem) –

    The system to simulate.

  • force_field (TensorForceField) –

    The force field to use.

  • frames_path (Path) –

    The path to the trajectory to compute the average over.

  • temperature (Quantity) –

    The temperature that the trajectory was simulated at.

  • pressure (Quantity | None) –

    The pressure that the trajectory was simulated at.

Returns:

  • tuple[dict[str, Tensor], dict[str, Tensor]]

    A dictionary containing the ensemble averages of the potential energy [kcal/mol], volume [Å^3], density [g/mL], and enthalpy [kcal/mol], and a dictionary containing their standard deviations.

Source code in smee/mm/_ops.py
def compute_ensemble_averages(
    system: smee.TensorSystem,
    force_field: smee.TensorForceField,
    frames_path: pathlib.Path,
    temperature: openmm.unit.Quantity,
    pressure: openmm.unit.Quantity | None,
) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
    """Compute ensemble average of the potential energy, volume, density,
    and enthalpy (if running NPT) over an MD trajectory.

    Args:
        system: The system to simulate.
        force_field: The force field to use.
        frames_path: The path to the trajectory to compute the average over.
        temperature: The temperature that the trajectory was simulated at.
        pressure: The pressure that the trajectory was simulated at.

    Returns:
        A dictionary containing the ensemble averages of the potential energy
        [kcal/mol], volume [Å^3], density [g/mL], and enthalpy [kcal/mol],
        and a dictionary containing their standard deviations.
    """
    tensors, parameter_lookup, attribute_lookup, has_v_sites = _pack_force_field(
        force_field
    )

    beta = 1.0 / (openmm.unit.MOLAR_GAS_CONSTANT_R * temperature)
    beta = beta.value_in_unit(openmm.unit.kilocalorie_per_mole**-1)

    if pressure is not None:
        pressure = (pressure * openmm.unit.AVOGADRO_CONSTANT_NA).value_in_unit(
            openmm.unit.kilocalorie_per_mole / openmm.unit.angstrom**3
        )

    kwargs: _EnsembleAverageKwargs = {
        "force_field": force_field,
        "parameter_lookup": parameter_lookup,
        "attribute_lookup": attribute_lookup,
        "has_v_sites": has_v_sites,
        "system": system,
        "frames_path": frames_path,
        "beta": beta,
        "pressure": pressure,
    }

    *avg_outputs, columns = _EnsembleAverageOp.apply(kwargs, *tensors)

    avg_values = avg_outputs[: len(avg_outputs) // 2]
    avg_std = avg_outputs[len(avg_outputs) // 2 :]

    return (
        {column: avg for avg, column in zip(avg_values, columns, strict=True)},
        {column: avg for avg, column in zip(avg_std, columns, strict=True)},
    )

reweight_ensemble_averages #

reweight_ensemble_averages(
    system: TensorSystem,
    force_field: TensorForceField,
    frames_path: Path,
    temperature: Quantity,
    pressure: Quantity | None,
    min_samples: int = 50,
) -> dict[str, Tensor]

Compute the ensemble average of the potential energy, volume, density, and enthalpy (if running NPT) by re-weighting an existing MD trajectory.

Parameters:

  • system (TensorSystem) –

    The system that was simulated.

  • force_field (TensorForceField) –

    The new force field to use.

  • frames_path (Path) –

    The path to the trajectory to compute the average over.

  • temperature (Quantity) –

    The temperature that the trajectory was simulated at.

  • pressure (Quantity | None) –

    The pressure that the trajectory was simulated at.

  • min_samples (int, default: 50 ) –

    The minimum number of samples required to compute the average.

Raises:

Returns:

  • dict[str, Tensor]

    A dictionary containing the ensemble averages of the potential energy [kcal/mol], volume [Å^3], density [g/mL], and enthalpy [kcal/mol].

Source code in smee/mm/_ops.py
def reweight_ensemble_averages(
    system: smee.TensorSystem,
    force_field: smee.TensorForceField,
    frames_path: pathlib.Path,
    temperature: openmm.unit.Quantity,
    pressure: openmm.unit.Quantity | None,
    min_samples: int = 50,
) -> dict[str, torch.Tensor]:
    """Compute the ensemble average of the potential energy, volume, density,
    and enthalpy (if running NPT) by re-weighting an existing MD trajectory.

    Args:
        system: The system that was simulated.
        force_field: The new force field to use.
        frames_path: The path to the trajectory to compute the average over.
        temperature: The temperature that the trajectory was simulated at.
        pressure: The pressure that the trajectory was simulated at.
        min_samples: The minimum number of samples required to compute the average.

    Raises:
        NotEnoughSamplesError: If the number of effective samples is less than
            ``min_samples``.

    Returns:
        A dictionary containing the ensemble averages of the potential energy
        [kcal/mol], volume [Å^3], density [g/mL], and enthalpy [kcal/mol].
    """
    tensors, parameter_lookup, attribute_lookup, has_v_sites = _pack_force_field(
        force_field
    )

    beta = 1.0 / (openmm.unit.MOLAR_GAS_CONSTANT_R * temperature)
    beta = beta.value_in_unit(openmm.unit.kilocalorie_per_mole**-1)

    if pressure is not None:
        pressure = (pressure * openmm.unit.AVOGADRO_CONSTANT_NA).value_in_unit(
            openmm.unit.kilocalorie_per_mole / openmm.unit.angstrom**3
        )

    kwargs: _ReweightAverageKwargs = {
        "force_field": force_field,
        "parameter_lookup": parameter_lookup,
        "attribute_lookup": attribute_lookup,
        "has_v_sites": has_v_sites,
        "system": system,
        "frames_path": frames_path,
        "beta": beta,
        "pressure": pressure,
        "min_samples": min_samples,
    }

    *avg_outputs, columns = _ReweightAverageOp.apply(kwargs, *tensors)
    return {column: avg for avg, column in zip(avg_outputs, columns, strict=True)}

tensor_reporter #

tensor_reporter(
    output_path: PathLike,
    report_interval: int,
    beta: Quantity,
    pressure: Quantity | None,
) -> TensorReporter

Create a TensorReporter capable of writing frames to a file.

Parameters:

  • output_path (PathLike) –

    The path to write the frames to.

  • report_interval (int) –

    The interval (in steps) at which to write frames.

  • beta (Quantity) –

    The inverse temperature the simulation is being run at.

  • pressure (Quantity | None) –

    The pressure the simulation is being run at, or None if NVT / vacuum.

Source code in smee/mm/_reporters.py
@contextlib.contextmanager
def tensor_reporter(
    output_path: os.PathLike,
    report_interval: int,
    beta: openmm.unit.Quantity,
    pressure: openmm.unit.Quantity | None,
) -> TensorReporter:
    """Create a ``TensorReporter`` capable of writing frames to a file.

    Args:
        output_path: The path to write the frames to.
        report_interval: The interval (in steps) at which to write frames.
        beta: The inverse temperature the simulation is being run at.
        pressure: The pressure the simulation is being run at, or ``None`` if NVT /
            vacuum.
    """
    with open(output_path, "wb") as output_file:
        reporter = TensorReporter(output_file, report_interval, beta, pressure)
        yield reporter

unpack_frames #

unpack_frames(
    file: BinaryIO,
) -> Generator[tuple[Tensor, Tensor, float], None, None]

Unpack frames saved by a TensorReporter.

Source code in smee/mm/_reporters.py
def unpack_frames(
    file: typing.BinaryIO,
) -> typing.Generator[tuple[torch.Tensor, torch.Tensor, float], None, None]:
    """Unpack frames saved by a ``TensorReporter``."""

    unpacker = msgpack.Unpacker(file, object_hook=_decoder)

    for frame in unpacker:
        yield frame