Source code for petab_sciml.standard.array_data

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, Iterable

import numpy as np
from mkstd import Hdf5Standard
from mkstd.types.array import get_array_type
from numpy.typing import ArrayLike
from pydantic import BaseModel, field_validator
from ruamel.yaml import YAML

from petab_sciml.standard.nn_model import NNModelStandard

if TYPE_CHECKING:
    import torch


__all__ = [
    "Metadata",
    "ArrayData",
    "ArrayDataStandard",
    "METADATA",
    "DATA",
    "CONDITION_IDS",
    "INPUTS",
    "PARAMETERS",
    "ALL_CONDITION_IDS",
    "extract_torch_parameters",
    "add_array_files_to_yaml",
]


METADATA = "metadata"
DATA = "data"
CONDITION_IDS = "conditionIds"
INPUTS = "inputs"
PARAMETERS = "parameters"
ALL_CONDITION_IDS = "0"


Array = get_array_type()


[docs] class Metadata(BaseModel): """Metadata for array(s).""" pytorch_format: bool """Whether the arrays match the default PyTorch format. For example, PyTorch uses row-major arrays, and the weight matrix of its "Linear layer" is `out_features x in_features`. True indicates that the arrays can be directly used in PyTorch. False indicates that some array operations are first required. """
[docs] class ArrayData(BaseModel): """Multiple arrays. For example, data for different inputs for different conditions, or values for different parameters of different layers. """ metadata: Metadata """Additional metadata for the arrays.""" inputs: dict[str, dict[str, Array]] = {} """Input data arrays. Outer keys are input IDs. Inner dict keys are semicolon-delimited lists of condition IDs, and values are the corresponding input array data for those conditions. """ parameters: dict[str, dict[str, dict[str, Array]]] = {} """Parameter value arrays. Outer dict keys are NN model IDs. Inner dict keys are layer IDs. Inner inner dict keys are layer-specific parameter IDs, and values are the corresponding array data. """ @field_validator(INPUTS, mode="after") @classmethod def validate_condition_ids(cls, inputs) -> dict[str, ArrayLike]: for input_id, input_data in inputs.items(): if not input_data: raise ValueError(f"No input data supplied for input `{input_id}`.") for condition_ids_str, array in input_data.items(): n_arrays = len(input_data) if (condition_ids_str == ALL_CONDITION_IDS) and (n_arrays != 1): raise ValueError( "The condition IDs list is " f"`{ALL_CONDITION_IDS}`, which indicates that the " "array will be applied to all conditions. In this " "case, exactly one array must be specified. " f"However, {n_arrays} arrays were specified for " f"input `{input_id}`." ) return inputs
[docs] def extract_torch_parameters(torch_module: "torch.nn.Module", nn_model_id: str) -> dict: """Extract parameters as NumPy arrays from a PyTorch module. Parameters are grouped by layer ID and parameter ID using the final dot separator in each named parameter, e.g. `layer.weight` or `layer.bias`. Args: torch_module: A PyTorch module whose `named_parameters()` will be converted to NumPy arrays. nn_model_id: Neural network model ID, as defined in the PEtab-SciML YAML file. Returns: A nested dictionary compatible with ArrayData for exporting to PEtab-SciML HDF5-file format """ array_dict = { METADATA: {"pytorch_format": True}, PARAMETERS: { nn_model_id: {}, }, } parameters_net_dict = array_dict[PARAMETERS][nn_model_id] for name, value in torch_module.named_parameters(): # Layer with no parameters to estimate if value.numel() == 0: continue try: layer_id, parameter_id = name.rsplit(".", maxsplit=1) except ValueError as exc: raise ValueError( f"Expected PyTorch parameter name of the form " f"'<layer_id>.<parameter_id>', got {name!r}." ) from exc val_as_array = _to_numpy_array(value) parameters_net_dict.setdefault(layer_id, {})[parameter_id] = val_as_array return array_dict
def extract_nn_yaml_parameters(yaml_file: str | Path) -> dict: """Extract parameters as NumPy arrays from a PEtab-SciML YAML file This function loads a PEtab-SciML neural-network YAML file, reconstructs the corresponding PyTorch module, and extracts the initialized module parameters as NumPy arrays that can be exported to the PEtab-SciML HDF5 array-file format. Args: yaml_file: Path to a PEtab-SciML neural-network YAML file. Returns: A nested dictionary compatible with ArrayData for exporting to PEtab-SciML HDF5-file format """ nn_model = NNModelStandard.load_data(yaml_file) torch_module = nn_model.to_pytorch_module() return extract_torch_parameters(torch_module, nn_model.nn_model_id)
[docs] def add_array_files_to_yaml( yaml_file: str | Path, array_files: str | Path | Iterable[str | Path], overwrite: bool = False, ) -> Path: """Add PEtab-SciML HDF5 array file(s) to a PEtab problem YAML file. Args: yaml_file: Path to the PEtab problem YAML file to update in place. array_files: Array file path or array file paths to add to the YAML file. Files may be given as absolute paths or as paths relative to the current working directory. They are stored in the YAML file as paths relative to the directory containing ``yaml_file``. overwrite: If ``True``, replace any existing list in ``extensions.petab_sciml.array_files`` with the provided files. If ``False``, append any files not already present (duplicates are ignored). Returns: Path: Path to the updated YAML file. """ yaml_file = Path(yaml_file) if isinstance(array_files, (str, Path)): array_files = [Path(array_files)] else: array_files = [Path(array_file) for array_file in array_files] yaml = YAML() yaml.preserve_quotes = True with open(yaml_file, "r") as f: data = yaml.load(f) yaml_dir = yaml_file.parent.resolve() extensions = data.setdefault("extensions", {}) petab_sciml = extensions.setdefault("petab_sciml", {}) existing_array_files = petab_sciml.setdefault("array_files", []) existing_array_file_paths = { # Handles both absolute and relative `array_file` correctly (yaml_dir / Path(array_file)).resolve() for array_file in existing_array_files } for array_file in array_files: array_file = array_file.resolve() if array_file in existing_array_file_paths: if overwrite is False: raise ValueError( f"Array file {array_file.name!r} is already listed in " "'extensions.petab_sciml.array_files'." ) continue array_file_relative = array_file.relative_to(yaml_dir, walk_up=True).as_posix() existing_array_files.append(array_file_relative) with open(yaml_file, "w") as f: yaml.dump(data, f) return yaml_file
def _to_numpy_array(array: ArrayLike) -> np.ndarray: """Convert supported array-like objects to data accepted by h5py.""" if hasattr(array, "detach"): array = array.detach().numpy() return np.asarray(array) ArrayDataStandard = Hdf5Standard(model=ArrayData) if __name__ == "__main__": from pathlib import Path ArrayDataStandard.save_schema( Path(__file__).resolve().parents[4] / "doc" / "standard" / "array_data_schema.json" )