dataset ¶

Classes:

EQDataset –

Dataset for equilibrium equation solutions.

Functions:

read_and_interpolate_solutions –

Read a pickle file containing ODE solutions and interpolate them at given

EQDataset ¶

EQDataset(data_path: Path, radial_res: int = 100, uniform_sampling: bool = False, rfp_only: bool = False, keep_rfp_flag: bool = False)

Dataset for equilibrium equation solutions.

Parameters:

data_path ¶
(Path) –

Path to the directory containing the dataset .pkl files.
radial_res ¶
(int, default: 100 ) –

Number of radial points to interpolate the solutions at. Each equilibrium will be represented on this radial grid, by default 100.
uniform_sampling ¶
(bool, default: False ) –

If True, sample the radial points uniformly at random in [RMIN/2, RMAX]. If False, use a fixed linearly spaced grid in the same interval, by default False.
rfp_only ¶
(bool, default: False ) –

If True, restrict the dataset to rows corresponding to RFP equilibria only (rfp_flag == 1.0). If False, keep all rows, by default False.
keep_rfp_flag ¶
(bool, default: False ) –

If True, keep the rfp_flag column as the last feature in the internal data array. This is mainly intended for debugging, since models do not use this flag as input. If False, the column is removed, by default False.

Methods:

__getitem__ –

Get the radial grid, input parameters, and solution profile for a given
__len__ –

Return the length of the dataset.
get_data –

Get the dataset as input–target pairs.
inverse_transform_inputs –

Inverse transform the input features from model space back to the
transform_inputs –

Transform the input features to be fed to the model.

Source code in src/fpga_profile_reco/data/dataset.py

def __init__(self, data_path: Path, radial_res: int = 100, uniform_sampling: bool = False, rfp_only: bool = False, keep_rfp_flag: bool = False):
    """
    Initialize the dataset.

    Parameters
    ----------
    data_path : pathlib.Path
        Path to the directory containing the dataset ``.pkl`` files.
    radial_res : int, optional
        Number of radial points to interpolate the solutions at. Each
        equilibrium will be represented on this radial grid, by default 100.
    uniform_sampling : bool, optional
        If True, sample the radial points uniformly at random in
        ``[RMIN/2, RMAX]``. If False, use a fixed linearly spaced grid
        in the same interval, by default False.
    rfp_only : bool, optional
        If True, restrict the dataset to rows corresponding to RFP
        equilibria only (``rfp_flag == 1.0``). If False, keep all rows,
        by default False.
    keep_rfp_flag : bool, optional
        If True, keep the ``rfp_flag`` column as the last feature in the
        internal data array. This is mainly intended for debugging, since
        models do not use this flag as input. If False, the column is
        removed, by default False.
    """
    self.radial_res = radial_res
    self.data_files = list(data_path.glob("*.pkl"))
    self._data = []

    if uniform_sampling:
        rng = np.random.default_rng(seed=cfg.SEED)
        r = rng.uniform(low=eqs.RMIN / 2, high=eqs.RMAX, size=self.radial_res)
    else:
        r = np.linspace(eqs.RMIN / 2, eqs.RMAX, self.radial_res)
    # use ProcessPoolExecutor to parallelize file reading and interpolation (cpu intensive task)
    with ProcessPoolExecutor(max_workers=max(os.cpu_count(), 25)) as executor:
        results = list(tqdm(executor.map(read_and_interpolate_solutions, self.data_files, [r] * len(self.data_files)),
                            total=len(self.data_files), desc="Loading and interpolating...", unit="files"))

    for batch_data in results:
        self._data.extend(batch_data)

    self._data = np.array(self._data)

    if rfp_only:
        self._data = self._data[self._data[:, -1] == 1.0]  # filter rows where rfp_flag is 1.0

    # remove the rfp_flag column from the dataset (last column)
    if not keep_rfp_flag:
        self._data = self._data[:, :-1]
    print(f"Dataset shape: {self._data.shape}")

getitem ¶

__getitem__(idx: int) -> Tuple[ndarray, ndarray, ndarray]

Get the radial grid, input parameters, and solution profile for a given equilibrium (set of parameters).

The dataset is internally stored as a flattened array where each equilibrium corresponds to radial_res consecutive rows.

Parameters:

idx ¶
(int) –

Index of the equilibrium to retrieve in the range [0, len(self) // radial_res).

Returns:

r ( ndarray ) –

One-dimensional array of shape (radial_res,) containing the radial grid points.
params ( ndarray ) –

One-dimensional array of shape (4,) containing the input parameters [alpha, theta_0, delta_h, delta_a] for this equilibrium.
solution ( ndarray ) –

Two-dimensional array of shape (radial_res, n_solution_vars) containing the solution variables evaluated on the radial grid.

Source code in src/fpga_profile_reco/data/dataset.py

def __getitem__(self, idx: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Get the radial grid, input parameters, and solution profile for a given
    equilibrium (set of parameters).

    The dataset is internally stored as a flattened array where each
    equilibrium corresponds to ``radial_res`` consecutive rows.

    Parameters
    ----------
    idx : int
        Index of the equilibrium to retrieve in the range
        ``[0, len(self) // radial_res)``.

    Returns
    -------
    r : numpy.ndarray
        One-dimensional array of shape ``(radial_res,)`` containing the
        radial grid points.
    params : numpy.ndarray
        One-dimensional array of shape ``(4,)`` containing the input
        parameters ``[alpha, theta_0, delta_h, delta_a]`` for this
        equilibrium.
    solution : numpy.ndarray
        Two-dimensional array of shape ``(radial_res, n_solution_vars)``
        containing the solution variables evaluated on the radial grid.
    """

    # return a computed solution (radial profile) for a given index
    start_idx = idx * self.radial_res
    end_idx = start_idx + self.radial_res

    r = self._data[start_idx:end_idx, 0]  # shape (radial_res,)
    params = self._data[start_idx, 1:5]  # shape (4,)
    solution = self._data[start_idx:end_idx, 5:]  # shape (radial_res, n_solution_vars)

    return r, params, solution

len ¶

__len__()

Return the length of the dataset.

Returns:

int –

Number of rows in the internal data array. This is equal to n_equilibria * radial_res.

Source code in src/fpga_profile_reco/data/dataset.py

def __len__(self):
    """
    Return the length of the dataset.

    Returns
    -------
    int
        Number of rows in the internal data array. This is equal to
        ``n_equilibria * radial_res``.
    """
    return len(self._data)

get_data ¶

get_data(scale_data: bool = True) -> Tuple[ndarray, ndarray]

Get the dataset as input–target pairs.

Parameters:

scale_data ¶
(bool, default: True ) –

If True, return inputs with the first four features normalized using :meth:transform_inputs. If False, return raw inputs. Defaults to True.

Returns:

inputs ( ndarray ) –

Array of shape (n_samples, 5) containing the input features [r, alpha, theta_0, delta_h, delta_a].
targets ( ndarray ) –

Array of shape (n_samples, n_solution_vars) containing the solution variables at the corresponding radial points.

Source code in src/fpga_profile_reco/data/dataset.py

def get_data(self, scale_data: bool = True) -> Tuple[np.ndarray, np.ndarray]:
    """
    Get the dataset as input–target pairs.

    Parameters
    ----------
    scale_data : bool, optional
        If True, return inputs with the first four features normalized
        using :meth:`transform_inputs`. If False, return raw inputs.
        Defaults to True.

    Returns
    -------
    inputs : numpy.ndarray
        Array of shape ``(n_samples, 5)`` containing the input features
        ``[r, alpha, theta_0, delta_h, delta_a]``.
    targets : numpy.ndarray
        Array of shape ``(n_samples, n_solution_vars)`` containing the
        solution variables at the corresponding radial points.
    """
    if scale_data:
        # make a copy to avoid modifying the original data
        data = self._data.copy()
        # scale inputs
        data = self.transform_inputs(data)
        return data[:, :5], data[:, 5:]
    else:
        return self._data[:, :5], self._data[:, 5:]

inverse_transform_inputs `staticmethod` ¶

inverse_transform_inputs(data: ndarray) -> ndarray

Inverse transform the input features from model space back to the original physical space.

This method performs an in-place inverse normalization of the first four columns of data corresponding to [r, alpha, theta_0, delta_h].

Parameters:

data ¶
(ndarray) –

Transformed input data of shape (n_samples, n_features) in model (normalized) space. It is modified in place and also returned.

Returns:

ndarray –

The same array data after inverse transformation.

Source code in src/fpga_profile_reco/data/dataset.py

@staticmethod
def inverse_transform_inputs(data: np.ndarray) -> np.ndarray:
    """
    Inverse transform the input features from model space back to the
    original physical space.

    This method performs an in-place inverse normalization of the first
    four columns of ``data`` corresponding to ``[r, alpha, theta_0, delta_h]``.

    Parameters
    ----------
    data : numpy.ndarray
        Transformed input data of shape ``(n_samples, n_features)`` in
        model (normalized) space. It is modified in place and also returned.

    Returns
    -------
    numpy.ndarray
        The same array ``data`` after inverse transformation.
    """
    # r from [0, 1] to [RMIN/2, RMAX]
    data[:, 0] = data[:, 0] * (eqs.RMAX - (eqs.RMIN / 2)) + (eqs.RMIN / 2)
    # alpha from [0, 1] to [ALPHA_MIN, ALPHA_MAX]
    data[:, 1] = data[:, 1] * (eqs.ALPHA_MAX - eqs.ALPHA_MIN) + eqs.ALPHA_MIN
    # theta_0 from [0, 1] to [THETA_0_MIN, THETA_0_MAX]
    data[:, 2] = data[:, 2] * (eqs.THETA_0_MAX - eqs.THETA_0_MIN) + eqs.THETA_0_MIN
    # delta_h from [0, 1] to [DELTA_H_MIN, DELTA_H_MAX]
    data[:, 3] = data[:, 3] * (eqs.DELTA_H_MAX - eqs.DELTA_H_MIN) + eqs.DELTA_H_MIN

    return data

transform_inputs `staticmethod` ¶

transform_inputs(data: ndarray) -> ndarray

Transform the input features to be fed to the model.

This method performs an in-place normalization of the first four columns of data corresponding to [r, alpha, theta_0, delta_h].

Parameters:

data ¶
(ndarray) –

Raw input data of shape (n_samples, n_features). It is modified in place and also returned.

Returns:

ndarray –

The same array data after transformation.

Source code in src/fpga_profile_reco/data/dataset.py

@staticmethod
def transform_inputs(data: np.ndarray) -> np.ndarray:
    """
    Transform the input features to be fed to the model.

    This method performs an in-place normalization of the first four
    columns of ``data`` corresponding to ``[r, alpha, theta_0, delta_h]``.

    Parameters
    ----------
    data : numpy.ndarray
        Raw input data of shape ``(n_samples, n_features)``. It is modified
        in place and also returned.

    Returns
    -------
    numpy.ndarray
        The same array ``data`` after transformation.
    """
    # r from [RMIN/2, RMAX] to [0, 1]
    data[:, 0] = (data[:, 0] - (eqs.RMIN / 2)) / (eqs.RMAX - (eqs.RMIN / 2))
    # alpha from [ALPHA_MIN, ALPHA_MAX] to [0, 1]
    data[:, 1] = (data[:, 1] - eqs.ALPHA_MIN) / (eqs.ALPHA_MAX - eqs.ALPHA_MIN)
    # theta_0 from [THETA_0_MIN, THETA_0_MAX] to [0, 1]
    data[:, 2] = (data[:, 2] - eqs.THETA_0_MIN) / (eqs.THETA_0_MAX - eqs.THETA_0_MIN)
    # delta_h from [DELTA_H_MIN, DELTA_H_MAX] to [0, 1]
    data[:, 3] = (data[:, 3] - eqs.DELTA_H_MIN) / (eqs.DELTA_H_MAX - eqs.DELTA_H_MIN)

    return data

read_and_interpolate_solutions ¶

read_and_interpolate_solutions(file: Path, r: ndarray) -> ndarray

Read a pickle file containing ODE solutions and interpolate them at given radial points.

Parameters:

file ¶
(Path) –

Path to the pickle file containing a batch of samples.
r ¶
(ndarray) –

One-dimensional array of radial points at which to interpolate the solutions, of shape (radial_res,).

Returns:

ndarray –

Interpolated data array of shape (n_samples * radial_res, n_features) for this file, where each block of radial_res rows corresponds to one equilibrium.

Source code in src/fpga_profile_reco/data/dataset.py

def read_and_interpolate_solutions(file: Path, r: np.ndarray) -> np.ndarray:
    """
    Read a pickle file containing ODE solutions and interpolate them at given
    radial points.

    Parameters
    ----------
    file : pathlib.Path
        Path to the pickle file containing a batch of samples.
    r : numpy.ndarray
        One-dimensional array of radial points at which to interpolate the
        solutions, of shape ``(radial_res,)``.

    Returns
    -------
    numpy.ndarray
        Interpolated data array of shape ``(n_samples * radial_res, n_features)``
        for this file, where each block of ``radial_res`` rows corresponds to
        one equilibrium.
    """

    out_blocks = []  # collect each file's matrix

    with open(file, "rb") as f:
        batch_data = pickle.load(f)

    for sample in batch_data:

        sol: OdeSolution = sample['sol']

        # Use sol.sol if available (faster) else fallback
        if hasattr(sol, "sol") and sol.sol is not None:
            sol_eval = sol.sol(r).T   # shape (radial_res, n_vars)
        else:
            sol_eval = sol(r).T       # slower fallback

        res = len(r)

        # Build constant columns
        alpha = np.full((res, 1), sample['alpha'])
        theta = np.full((res, 1), sample['theta_0'])
        deltah = np.full((res, 1), sample['delta_h'])
        delta_a = np.full((res, 1), sample['delta_a'])
        rfp_flag = np.full((res, 1), sample['rfp_flag'])

        # Combine everything vectorized
        block = np.hstack((r.reshape(-1, 1), alpha, theta, deltah, delta_a, sol_eval, rfp_flag))

        out_blocks.append(block)

    # Concat this file's full result
    return np.vstack(out_blocks)

dataset ¶

EQDataset ¶

`data_path` ¶

`radial_res` ¶

`uniform_sampling` ¶

`rfp_only` ¶

`keep_rfp_flag` ¶

getitem ¶

`idx` ¶

len ¶

get_data ¶

`scale_data` ¶

inverse_transform_inputs `staticmethod` ¶

`data` ¶

transform_inputs `staticmethod` ¶

`data` ¶

read_and_interpolate_solutions ¶

`file` ¶

`r` ¶

dataset ¶

EQDataset ¶

data_path ¶

radial_res ¶

uniform_sampling ¶

rfp_only ¶

keep_rfp_flag ¶

__getitem__ ¶

idx ¶

__len__ ¶

get_data ¶

scale_data ¶

inverse_transform_inputs staticmethod ¶

data ¶

transform_inputs staticmethod ¶

data ¶

read_and_interpolate_solutions ¶

file ¶

r ¶

`data_path` ¶

`radial_res` ¶

`uniform_sampling` ¶

`rfp_only` ¶

`keep_rfp_flag` ¶

getitem ¶

`idx` ¶

len ¶

`scale_data` ¶

inverse_transform_inputs `staticmethod` ¶

`data` ¶

transform_inputs `staticmethod` ¶

`data` ¶

`file` ¶

`r` ¶