Source code for metatrain.utils.data.writers.xyz

from typing import Dict, List

import ase
import ase.io
import metatensor.torch
import torch
from metatensor.torch import Labels, TensorMap
from metatomic.torch import ModelCapabilities, System

from ...external_naming import to_external_name



[docs]
def write_xyz(
    filename: str,
    systems: List[System],
    capabilities: ModelCapabilities,
    predictions: Dict[str, TensorMap],
) -> None:
    """An ase-based xyz file writer. Writes the systems and predictions to an xyz file.

    According to ASE practice, arrays which have a dimension corresponding
    to each atom are saved inside atoms.arrays, while any other arrays are
    saved inside atoms.info.

    :param filename: name of the file to save to.
    :param systems: structures to be written to the file.
    :param: capabilities: capabilities of the model.
    :param predictions: prediction values to be written to the file.
    """

    # we first split the predictions by structure
    predictions_by_structure: List[Dict[str, TensorMap]] = [{} for _ in systems]
    split_labels = [
        Labels(names=["system"], values=torch.tensor([[i_system]]))
        for i_system in range(len(systems))
    ]
    for target_name, target_tensor_map in predictions.items():
        # split this target by structure
        target_tensor_map = target_tensor_map.to("cpu")
        split_target = metatensor.torch.split(
            target_tensor_map, "samples", split_labels
        )
        for i_system, system_target in enumerate(split_target):
            # add the split target to the dict corresponding to the structure
            predictions_by_structure[i_system][target_name] = system_target

    frames = []
    for system, system_predictions in zip(systems, predictions_by_structure):
        info = {}
        arrays = {}
        for target_name, target_map in system_predictions.items():
            if len(target_map.keys) != 1:
                raise ValueError(
                    "Only single-block `TensorMap`s can be "
                    "written to xyz files for the moment."
                )
            block = target_map.block()
            if "atom" in block.samples.names:
                # save inside arrays
                values = block.values.detach().cpu().numpy()
                arrays[target_name] = values.reshape(values.shape[0], -1)
                # reshaping here is necessary because `arrays` only accepts 2D arrays
            else:
                # save inside info
                if block.values.numel() == 1:
                    info[target_name] = block.values.item()
                else:
                    info[target_name] = block.values.detach().cpu().numpy().squeeze(0)
                    # squeeze the sample dimension, which corresponds to the system

            for gradient_name, gradient_block in block.gradients():
                # here, we assume that gradients are always an array, and never a scalar
                internal_name = f"{target_name}_{gradient_name}_gradients"
                external_name = to_external_name(internal_name, capabilities.outputs)

                if "forces" in external_name:
                    arrays[external_name] = (
                        # squeeze the property dimension
                        -gradient_block.values.detach().cpu().squeeze(-1).numpy()
                    )
                elif "virial" in external_name:
                    # in this case, we write both the virial and the stress
                    external_name_virial = external_name
                    external_name_stress = external_name.replace("virial", "stress")
                    strain_derivatives = (
                        # squeeze the property dimension
                        gradient_block.values.detach().cpu().squeeze(-1).numpy()
                    )
                    if not torch.any(system.cell != 0):
                        raise ValueError(
                            "stresses cannot be written for non-periodic systems."
                        )
                    cell_volume = torch.det(system.cell).item()
                    if cell_volume == 0:
                        raise ValueError(
                            "stresses cannot be written for systems with zero volume."
                        )
                    info[external_name_virial] = -strain_derivatives
                    info[external_name_stress] = strain_derivatives / cell_volume
                else:
                    info[external_name] = (
                        # squeeze the property dimension
                        gradient_block.values.detach().cpu().squeeze(-1).numpy()
                    )

        atoms = ase.Atoms(
            symbols=system.types, positions=system.positions.detach(), info=info
        )

        # assign cell and pbcs
        if torch.any(system.cell != 0):
            atoms.pbc = True
            atoms.cell = system.cell.detach().cpu().numpy()

        # assign arrays
        for array_name, array in arrays.items():
            atoms.arrays[array_name] = array

        frames.append(atoms)

    ase.io.write(filename, frames)