Source code for pyvale.mooseherder.simloaderbyfield

# ==============================================================================
# pyvale: the python validation engine
# License: MIT
# Copyright (C) 2025 The Computer Aided Validation Team
# ==============================================================================

from pathlib import Path
from multiprocessing.pool import Pool
import numpy as np
import pandas as pd
from pyvale.mooseherder.outputloader import IOutputLoader
from pyvale.mooseherder.simdata import SimData, SimLoadConfig
from pyvale.mooseherder.simloadtools import (str_to_path,
                                          load_array,
                                          load_connectivity,
                                          load_field_files,
                                          check_sim_data_consistency,
                                          load_glob_vars,
                                          inv_group_dict)
from pyvale.mooseherder.simloadopts import SimLoadOpts
from pyvale.mooseherder.exceptions import SimLoadErr


[docs] class SimLoaderByField(IOutputLoader): """Class for loading simulation data (i.e. a `SimData` object) from a series of plain text delimited files or binary numpy npy files. Implements the `IOutputLoader` interface. """ __slots__ = ("_coords","_time_steps","_fields_dir","_file_patterns", "_field_slices","_load_opts","_connect","_glob_file", "_glob_slices")
[docs] def __init__(self, load_dir: Path, coords_file: str | Path | None, time_step_file: str | Path | None, node_field_files: dict[str,str] | None, connect_files: str | list[str] | None = None, glob_file: str | None = None, glob_slices: dict[str,slice] | None = None, load_opts: SimLoadOpts | None = None) -> None: """ Parameters ---------- load_dir : Path Directory to load the simulation data files from. coords_file : str | Path | None String or full path specifying the coordinates file. If None then no coordinates are loaded and they can be manually specified in the SimData object. time_step_file : str | Path | None String or full path to the file containing the simulation time steps. If None then no time step file is loaded and the time steps can be manually specified in the SimData object. node_field_files : dict[str,str] | None Dicitionary keyed by the node field variable name where the value is the file name for that field variable to be found in the load directory. If None then no nodal field variables are loaded. connect_files : str | list[str] | None, optional Wildcard pattern specifying how to identify connectivity files in the load directory or list of strings for the connectivity files, by default None. If None then no connectivity tables are loaded. glob_file : str | None, optional File name for the global variables file in the load directory, by default None. If None then global variables are not loaded. glob_slices : dict[str,slice] | None, optional Dictionary keyed with the global variable names with slices specifying which columns to extract for the given global variable, by default None. If None then no global variables are loaded. load_opts : SimLoadOpts | None, optional Options for loading the simulation data including the number of threads for using multi-processing to load field files, by default None. If None then a default load options dataclass is created. Raises ------ SimLoadErr The specified load directory is not a directory. """ self._load_dir = load_dir self._glob_file = glob_file self._glob_slices = glob_slices self._load_opts = load_opts self._coords = None self._time_steps = None self._connect = None self._node_file_pattern = None self._node_slices = None if not load_dir.is_dir(): raise SimLoadErr(f"Load directory: {load_dir.resolve}, is not a " + "directory.") if coords_file is not None: coords_path = str_to_path(load_dir,coords_file) self._coords = load_array(coords_path, load_opts.coord_header, load_opts.delimiter) if time_step_file is not None: time_step_path = str_to_path(load_dir,time_step_file) self._time_steps = load_array(time_step_path, load_opts.time_header, load_opts.delimiter) # Fix for column of nans from reading a 1 column csv if self._time_steps.ndim != 1: self._time_steps = self._time_steps[:,0] if connect_files is not None: self._connect = load_connectivity(load_dir, connect_files, load_opts) if node_field_files is not None: # We are loading by field so only need empty slicesx self._node_slices = {kk: slice(None) for kk in node_field_files} # We invert the keys and values of this dictionary grouping # duplicate keys as values - that way we can loop over this and use # the value lists to index into the slices opening a file with a # given pattern a single time. self._node_file_pattern = inv_group_dict(node_field_files)
# NOTE: interface function
[docs] def load_sim_data(self, load_config: SimLoadConfig) -> SimData: """Loads the simulation data object based on the specified config. Parameters ---------- load_config : SimLoadConfig Configuration specifying which parts of the SimData object to load. Returns ------- SimData The SimData object assembled from loading files from disk. """ #----------------------------------------------------------------------- # 1. Create SimData object to populate sim_data = SimData() if load_config.coords: sim_data.coords = self._coords if load_config.time: sim_data.time = self._time_steps if load_config.connect: sim_data.connect = self._connect #----------------------------------------------------------------------- # 2. Load global variables file if self._glob_file is not None and self._glob_slices is not None: sim_data.glob_vars = load_glob_vars(self._load_dir/self._glob_file, self._glob_slices, self._load_opts) #----------------------------------------------------------------------- # 3. Load node field variables by field if self._node_file_pattern is not None: node_vars = {} for file_pattern,field_keys in self._node_file_pattern.items(): slices_to_ext = {} for kk in field_keys: slices_to_ext[kk] = self._node_slices[kk] this_node_vars = load_field_files( self._load_dir, file_pattern, slices_to_ext, self._load_opts.node_field_header, self._load_opts.delimiter, load_config.time_inds, self._load_opts.workers, ) node_vars.update(this_node_vars) # Needed to get around extra axis issue for components in load func for nn in node_vars: node_vars[nn] = np.squeeze(node_vars[nn]) sim_data.node_vars = node_vars check_sim_data_consistency(sim_data) return sim_data
# NOTE: interface function
[docs] def load_all_sim_data(self) -> SimData: """Loads all simulation data into a SimData object. Returns ------- SimData The SimData object assembled from loading files from disk. """ # Default load config reads all available data load_config = SimLoadConfig() return self.load_sim_data(load_config)