Dataframe Manipulation¤

apebench.read_in_kwargs ¤

read_in_kwargs(df: pd.DataFrame) -> pd.DataFrame

Parse the scenario_kwargs column of a DataFrame and add the parsed entries as new columns.

Requires that the dictionary in scenario_kwargs has the same keys for all rows.

Source code in apebench/_utils.py

def read_in_kwargs(
    df: pd.DataFrame,
) -> pd.DataFrame:
    """
    Parse the `scenario_kwargs` column of a DataFrame and add the parsed entries
    as new columns.

    Requires that the dictionary in `scenario_kwargs` has the same keys for all
    rows.
    """
    col = df["scenario_kwargs"].apply(eval)
    entries = list(col[0].keys())
    for entry in entries:
        df[entry] = col.apply(lambda x: x[entry])
    return df

apebench.split_train ¤

split_train(metric_data: pd.DataFrame) -> pd.DataFrame

Decode the train column into category, type, and rollout columns.

Source code in apebench/_utils.py

def split_train(
    metric_data: pd.DataFrame,
) -> pd.DataFrame:
    """
    Decode the `train` column into `category`, `type`, and `rollout` columns.
    """
    metric_data["category"] = metric_data["train"].apply(lambda x: x.split(";")[0])
    metric_data["type"] = metric_data["category"].apply(
        lambda x: "sup" if x in ["one", "sup"] else "div"
    )
    metric_data["rollout"] = metric_data["train"].apply(
        lambda x: int(x.split(";")[1]) if x != "one" else 1
    )

    return metric_data

apebench.aggregate_gmean ¤

aggregate_gmean(
    metric_data: pd.DataFrame,
    *,
    up_to: int = 100,
    grouping_cols: list[str] = BASE_NAMES
) -> pd.DataFrame

Aggregate an error rollout over time via the geometric mean.

Args:

metric_data: The DataFrame to aggregate, must contain grouping_cols and mean_nRMSE as columns. When grouped by grouping_cols, the groups shall only contain values at different time steps.
up_to: The time step up to which to aggregate. (inclusive)
grouping_cols: The columns to group by.

Returns:

A DataFrame with the new column gmean_mean_nRMSE containing the geometric mean of the mean_nRMSE values up to up_to for each group.

Source code in apebench/_utils.py

def aggregate_gmean(
    metric_data: pd.DataFrame,
    *,
    up_to: int = 100,
    grouping_cols: list[str] = BASE_NAMES,
) -> pd.DataFrame:
    """
    Aggregate an error rollout over time via the geometric mean.

    Args:

    * `metric_data`: The DataFrame to aggregate, must contain `grouping_cols`
        and `mean_nRMSE` as columns. When grouped by `grouping_cols`, the groups
        shall only contain values at different time steps.
    * `up_to`: The time step up to which to aggregate. (inclusive)
    * `grouping_cols`: The columns to group by.

    Returns:

    * A DataFrame with the new column `gmean_mean_nRMSE` containing the
        geometric mean of the `mean_nRMSE` values up to `up_to` for each group.
    """
    return (
        metric_data.query(f"time_step <= {up_to}")
        .groupby(grouping_cols)
        .agg(gmean_mean_nRMSE=("mean_nRMSE", gmean))
        .reset_index()
    )

apebench.relative_by_config ¤

relative_by_config(
    data: pd.DataFrame,
    *,
    grouping_cols: list[str] = BASE_NAMES_NO_TRAIN,
    norm_query: str = "train == 'one'",
    value_col: str = "mean_nRMSE",
    suffix: str = "_rel"
) -> pd.DataFrame

Source code in apebench/_utils.py

def relative_by_config(
    data: pd.DataFrame,
    *,
    grouping_cols: list[str] = BASE_NAMES_NO_TRAIN,
    norm_query: str = "train == 'one'",
    value_col: str = "mean_nRMSE",
    suffix: str = "_rel",
) -> pd.DataFrame:
    def relativate_fn(sub_df):
        rel = sub_df.query(norm_query)[value_col]
        if len(rel) != 1:
            raise ValueError(
                f"Expected exactly one row to match {norm_query}, got {len(rel)}"
            )
        col = sub_df[value_col] / rel.values[0]
        sub_df[f"{value_col}{suffix}"] = col
        return sub_df

    return data.groupby(grouping_cols).apply(relativate_fn).reset_index(drop=True)