High-Level Postprocessing Utilities¤

They assumed you ran a study and now have a collection of results that both need to be melted (-> turned into a long format more suitable for visualization with seaborn or plotly) and concatenated.

Combines loss, metrics, and sample rollout postprocessing.

apebench.melt_concat_from_list ¤

melt_concat_from_list(
    raw_file_list: list[pathlib.Path],
    base_path: str,
    *,
    metric_name: Union[str, list[str]] = "mean_nRMSE",
    metric_file_name: str = "metrics",
    loss_file_name: str = "train_loss",
    sample_rollout_file_name: str = "sample_rollout",
    do_metrics: bool = True,
    do_loss: bool = False,
    do_sample_rollouts: bool = False
) -> tuple[
    Optional[pathlib.Path],
    Optional[pathlib.Path],
    Optional[pathlib.Path],
]

Melt and concatenate metrics, loss and sample rollouts from a list of raw files and save the resulting DataFrames to disk as CSV files. Use this function on the results of apebench.run_study.

Arguments:

raw_file_list: A list of paths to the raw data files.
base_path: The base path to store the results in.
metric_name: The name of the metric to melt.
metric_file_name: The name of the file to save the metrics to.
loss_file_name: The name of the file to save the loss to.
sample_rollout_file_name: The name of the file to save the sample rollouts to.
do_metrics: Whether to melt and save the metrics.
do_loss: Whether to melt and save the loss.
do_sample_rollouts: Whether to melt and save the sample rollouts.

Returns:

metric_df_file_name: The path to the metrics file.
loss_df_file_name: The path to the loss file.
sample_rollout_df_file_name: The path to the sample rollouts file.

Source code in apebench/_run.py

def melt_concat_from_list(
    raw_file_list: list[pathlib.Path],
    base_path: str,
    *,
    metric_name: Union[str, list[str]] = "mean_nRMSE",
    metric_file_name: str = "metrics",
    loss_file_name: str = "train_loss",
    sample_rollout_file_name: str = "sample_rollout",
    do_metrics: bool = True,
    do_loss: bool = False,
    do_sample_rollouts: bool = False,
) -> tuple[Optional[pathlib.Path], Optional[pathlib.Path], Optional[pathlib.Path]]:
    """
    Melt and concatenate metrics, loss and sample rollouts from a list of raw
    files and save the resulting DataFrames to disk as CSV files. Use this
    function on the results of [`apebench.run_study`][].

    **Arguments:**

    * `raw_file_list`: A list of paths to the raw data files.
    * `base_path`: The base path to store the results in.
    * `metric_name`: The name of the metric to melt.
    * `metric_file_name`: The name of the file to save the metrics to.
    * `loss_file_name`: The name of the file to save the loss to.
    * `sample_rollout_file_name`: The name of the file to save the sample
        rollouts to.
    * `do_metrics`: Whether to melt and save the metrics.
    * `do_loss`: Whether to melt and save the loss.
    * `do_sample_rollouts`: Whether to melt and save the sample rollouts.

    **Returns:**

    * `metric_df_file_name`: The path to the metrics file.
    * `loss_df_file_name`: The path to the loss file.
    * `sample_rollout_df_file_name`: The path to the sample rollouts file.
    """
    if do_metrics:
        metric_df = melt_concat_metrics_from_list(
            raw_file_list,
            metric_name=metric_name,
        )
        metric_df_file_name = base_path / pathlib.Path(f"{metric_file_name}.csv")
        metric_df.to_csv(
            metric_df_file_name,
            index=False,
        )
    else:
        metric_df_file_name = None

    if do_loss:
        loss_df = melt_concat_loss_from_list(raw_file_list)
        loss_df_file_name = base_path / pathlib.Path(f"{loss_file_name}.csv")
        loss_df.to_csv(
            loss_df_file_name,
            index=False,
        )
    else:
        loss_df_file_name = None

    if do_sample_rollouts:
        sample_rollout_df = melt_concat_sample_rollouts_from_list(raw_file_list)
        sample_rollout_df_file_name = base_path / pathlib.Path(
            f"{sample_rollout_file_name}.csv"
        )
        sample_rollout_df.to_csv(
            sample_rollout_df_file_name,
            index=False,
        )
    else:
        sample_rollout_df_file_name = None

    return metric_df_file_name, loss_df_file_name, sample_rollout_df_file_name

apebench.melt_concat_loss_from_list ¤

melt_concat_loss_from_list(
    raw_file_list: list[pathlib.Path],
) -> pd.DataFrame

Melt and concatenate loss from a list of raw files. Use this function on the results of apebench.run_study.

Arguments:

raw_file_list: A list of paths to the raw data files.

Returns:

loss_df: The DataFrame containing the loss.

Source code in apebench/_run.py

def melt_concat_loss_from_list(
    raw_file_list: list[pathlib.Path],
) -> pd.DataFrame:
    """
    Melt and concatenate loss from a list of raw files. Use this function on the
    results of [`apebench.run_study`][].

    **Arguments:**

    * `raw_file_list`: A list of paths to the raw data files.

    **Returns:**

    * `loss_df`: The DataFrame containing the loss.
    """
    loss_df_s = []
    for file_name in tqdm(
        raw_file_list,
        desc="Melt and Concat loss",
    ):
        data = pd.read_csv(file_name)
        data = melt_loss(data)
        loss_df_s.append(data)

    loss_df = pd.concat(loss_df_s)

    return loss_df

apebench.melt_concat_metrics_from_list ¤

melt_concat_metrics_from_list(
    raw_file_list: list[pathlib.Path],
    *,
    metric_name: Union[str, list[str]] = "mean_nRMSE"
) -> pd.DataFrame

Melt and concatenate metrics from a list of raw files. Use this function on the results of apebench.run_study.

Arguments:

raw_file_list: A list of paths to the raw data files.
metric_name: The name of the metric to melt.

Returns:

metric_df: The DataFrame containing the metrics.

Source code in apebench/_run.py

def melt_concat_metrics_from_list(
    raw_file_list: list[pathlib.Path],
    *,
    metric_name: Union[str, list[str]] = "mean_nRMSE",
) -> pd.DataFrame:
    """
    Melt and concatenate metrics from a list of raw files. Use this function on
    the results of [`apebench.run_study`][].

    **Arguments:**

    * `raw_file_list`: A list of paths to the raw data
      files.
    * `metric_name`: The name of the metric to melt.

    **Returns:**

    * `metric_df`: The DataFrame containing the metrics.
    """
    metric_df_s = []
    for file_name in tqdm(
        raw_file_list,
        desc="Melt and Concat metrics",
    ):
        data = pd.read_csv(file_name)
        data = melt_metrics(data, metric_name=metric_name)
        metric_df_s.append(data)

    metric_df = pd.concat(metric_df_s)

    return metric_df

apebench.melt_concat_sample_rollouts_from_list ¤

melt_concat_sample_rollouts_from_list(
    raw_file_list: list[pathlib.Path],
) -> pd.DataFrame

Melt and concatenate sample rollouts from a list of raw files. Use this function on the results of apebench.run_study.

Arguments:

raw_file_list: A list of paths to the raw data files.

Returns:

sample_rollout_df: The DataFrame containing the sample rollouts.

Source code in apebench/_run.py

def melt_concat_sample_rollouts_from_list(
    raw_file_list: list[pathlib.Path],
) -> pd.DataFrame:
    """
    Melt and concatenate sample rollouts from a list of raw files. Use this
    function on the results of [`apebench.run_study`][].

    **Arguments:**

    * `raw_file_list`: A list of paths to the raw data files.

    **Returns:**

    * `sample_rollout_df`: The DataFrame containing the sample rollouts.
    """
    sample_rollout_df_s = []
    for file_name in tqdm(
        raw_file_list,
        desc="Melt and Concat sample rollouts",
    ):
        data = pd.read_csv(file_name)
        data = melt_sample_rollouts(data)
        sample_rollout_df_s.append(data)

    sample_rollout_df = pd.concat(sample_rollout_df_s)

    return sample_rollout_df