Source code for jmetal.lab.statistical_test.apv_procedures

import numpy as np
import pandas as pd



[docs]
def bonferroni_dunn(p_values, control):
    """
    Bonferroni-Dunn's procedure for the adjusted p-value computation.

    Parameters:
    -----------
    p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test.
    control: int or string. Index or Name of the control algorithm.

    Returns:
    --------
    APVs: DataFrame containing the adjusted p-values.
    """

    # Initial Checking
    if type(p_values) == pd.DataFrame:
        algorithms = p_values.columns
        p_values = p_values.values
    elif type(p_values) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])])

    if type(control) == str:
        control = int(np.where(algorithms == control)[0])
    if control is None:
        raise ValueError("Initialization ERROR. Incorrect value for control.")

    k = p_values.shape[1]

    # sort p-values p(0) <= p(1) <= ... <= p(k-1)
    argsorted_pvals = np.argsort(p_values[0, :])

    APVs = np.zeros((k - 1, 1))
    comparison = []
    for i in range(k - 1):
        comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]])
        APVs[i, 0] = np.min([(k - 1) * p_values[0, argsorted_pvals[i]], 1])
    return pd.DataFrame(data=APVs, index=comparison, columns=["Bonferroni"])




[docs]
def holland(p_values, control):
    """
    Holland's procedure for the adjusted p-value computation.

    Parameters:
    -----------
    p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test.
    control: int or string. Index or Name of the control algorithm.

    Returns:
    --------
    APVs: DataFrame containing the adjusted p-values.
    """

    # Initial Checking
    if type(p_values) == pd.DataFrame:
        algorithms = p_values.columns
        p_values = p_values.values
    elif type(p_values) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])])

    if type(control) == str:
        control = int(np.where(algorithms == control)[0])
    if control is None:
        raise ValueError("Initialization ERROR. Incorrect value for control.")

    # --------------------------------------------------------------------------
    # ------------------------------- Procedure --------------------------------
    # --------------------------------------------------------------------------
    k = p_values.shape[1]

    # sort p-values p(0) <= p(1) <= ... <= p(k-1)
    argsorted_pvals = np.argsort(p_values[0, :])

    APVs = np.zeros((k - 1, 1))
    comparison = []
    for i in range(k - 1):
        comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]])
        aux = k - 1 - np.arange(i + 1)
        v = np.max(1 - (1 - p_values[0, argsorted_pvals[: (i + 1)]]) ** aux)
        APVs[i, 0] = np.min([v, 1])
    return pd.DataFrame(data=APVs, index=comparison, columns=["Holland"])




[docs]
def finner(p_values, control):
    """
    Finner's procedure for the adjusted p-value computation.

    Parameters:
    -----------
    p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test.
    control: int or string. Index or Name of the control algorithm.

    Returns:
    --------
    APVs: DataFrame containing the adjusted p-values.
    """

    # Initial Checking
    if type(p_values) == pd.DataFrame:
        algorithms = p_values.columns
        p_values = p_values.values
    elif type(p_values) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])])

    if type(control) == str:
        control = int(np.where(algorithms == control)[0])
    if control is None:
        raise ValueError("Initialization ERROR. Incorrect value for control.")

    k = p_values.shape[1]

    # sort p-values p(0) <= p(1) <= ... <= p(k-1)
    argsorted_pvals = np.argsort(p_values[0, :])

    APVs = np.zeros((k - 1, 1))
    comparison = []
    for i in range(k - 1):
        comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]])
        aux = float(k - 1) / (np.arange(i + 1) + 1)
        v = np.max(1 - (1 - p_values[0, argsorted_pvals[: (i + 1)]]) ** aux)
        APVs[i, 0] = np.min([v, 1])
    return pd.DataFrame(data=APVs, index=comparison, columns=["Finner"])




[docs]
def hochberg(p_values, control):
    """
    Hochberg's procedure for the adjusted p-value computation.

    Parameters:
    -----------
    p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test.
    control: int or string. Index or Name of the control algorithm.

    Returns:
    --------
    APVs: DataFrame containing the adjusted p-values.
    """

    # Initial Checking
    if type(p_values) == pd.DataFrame:
        algorithms = p_values.columns
        p_values = p_values.values
    elif type(p_values) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])])

    if type(control) == str:
        control = int(np.where(algorithms == control)[0])
    if control is None:
        raise ValueError("Initialization ERROR. Incorrect value for control.")

    k = p_values.shape[1]

    # sort p-values p(0) <= p(1) <= ... <= p(k-1)
    argsorted_pvals = np.argsort(p_values[0, :])

    APVs = np.zeros((k - 1, 1))
    comparison = []
    for i in range(k - 1):
        comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]])
        aux = np.arange(k, i, -1).astype(np.uint8)
        v = np.max(p_values[0, argsorted_pvals[aux - 1]] * (k - aux))
        APVs[i, 0] = np.min([v, 1])
    return pd.DataFrame(data=APVs, index=comparison, columns=["Hochberg"])




[docs]
def li(p_values, control):
    """
    Li's procedure for the adjusted p-value computation.

    Parameters:
    -----------
    p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test.
    control: optional int or string. Default None
        Index or Name of the control algorithm. If control is provided, control vs all
        comparisons are considered, else all vs all.

    Returns:
    --------
    APVs: DataFrame containing the adjusted p-values.
    """

    # Initial Checking
    if type(p_values) == pd.DataFrame:
        algorithms = p_values.columns
        p_values = p_values.values
    elif type(p_values) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])])

    if type(control) == str:
        control = int(np.where(algorithms == control)[0])
    if control is None:
        raise ValueError("Initialization ERROR. Incorrect value for control.")

    k = p_values.shape[1]

    # sort p-values p(0) <= p(1) <= ... <= p(k-1)
    argsorted_pvals = np.argsort(p_values[0, :])

    APVs = np.zeros((k - 1, 1))
    comparison = []
    for i in range(k - 1):
        comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]])
        APVs[i, 0] = np.min(
            [
                p_values[0, argsorted_pvals[-2]],
                p_values[0, argsorted_pvals[i]]
                / (p_values[0, argsorted_pvals[i]] + 1 - p_values[0, argsorted_pvals[-2]]),
            ]
        )
    return pd.DataFrame(data=APVs, index=comparison, columns=["Li"])




[docs]
def holm(p_values, control=None):
    """
    Holm's procedure for the adjusted p-value computation.

    Parameters:
    -----------
    p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test.
    control: optional int or string. Default None
        Index or Name of the control algorithm. If control is provided, control vs all
        comparisons are considered, else all vs all.

    Returns:
    --------
    APVs: DataFrame containing the adjusted p-values.
    """

    # Initial Checking
    if type(p_values) == pd.DataFrame:
        algorithms = p_values.columns
        p_values = p_values.values
    elif type(p_values) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])])

    if type(control) == str:
        control = int(np.where(algorithms == control)[0])

    if type(control) == int:
        k = p_values.shape[1]

        # sort p-values p(0) <= p(1) <= ... <= p(k-1)
        argsorted_pvals = np.argsort(p_values[0, :])

        APVs = np.zeros((k - 1, 1))
        comparison = []
        for i in range(k - 1):
            aux = k - 1 - np.arange(i + 1)
            comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]])
            v = np.max(aux * p_values[0, argsorted_pvals[: (i + 1)]])
            APVs[i, 0] = np.min([v, 1])

    elif control is None:
        k = p_values.shape[1]
        m = int((k * (k - 1)) / 2.0)

        # sort p-values p(0) <= p(1) <= ... <= p(m-1)
        pairs_index = np.triu_indices(k, 1)
        pairs_pvals = p_values[pairs_index]
        pairs_sorted = np.argsort(pairs_pvals)

        APVs = np.zeros((m, 1))
        aux = pairs_pvals[pairs_sorted] * (m - np.arange(m))
        comparison = []
        for i in range(m):
            row = pairs_index[0][pairs_sorted[i]]
            col = pairs_index[1][pairs_sorted[i]]
            comparison.append(algorithms[row] + " vs " + algorithms[col])
            v = np.max(aux[: i + 1])
            APVs[i, 0] = np.min([v, 1])
    return pd.DataFrame(data=APVs, index=comparison, columns=["Holm"])




[docs]
def shaffer(p_values):
    """
    Shaffer's procedure for adjusted p_value ccmputation.

    Parameters:
    -----------
    data: 2-D array or DataFrame containing the p-values.

    Returns:
    --------
    APVs: DataFrame containing the adjusted p-values.
    """

    def S(k):
        """
        Computes the set of possible numbers of true hoypotheses.

        Parameters:
        -----------
        k: int
            number of algorithms being compared.

        Returns
        ----------
        TrueSet : array-like
            Set of true hypotheses.
        """

        from scipy.special import binom as binomial

        TrueHset = [0]
        if k > 1:
            for j in np.arange(k, 0, -1, dtype=int):
                TrueHset = list(set(TrueHset) | set([binomial(j, 2) + x for x in S(k - j)]))
        return TrueHset

    # Initial Checking
    if type(p_values) == pd.DataFrame:
        algorithms = p_values.columns
        p_values = p_values.values
    elif type(p_values) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])])

    if p_values.ndim != 2:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")
    elif p_values.shape[0] != p_values.shape[1]:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")

    # define parameters
    k = p_values.shape[0]
    m = int(k * (k - 1) / 2.0)
    s = np.array(S(k)[1:])

    # sort p-values p(0) <= p(1) <= ... <= p(m-1)
    pairs_index = np.triu_indices(k, 1)
    pairs_pvals = p_values[pairs_index]
    pairs_sorted = np.argsort(pairs_pvals)

    # compute ti: max number of hypotheses that can be true given that any
    # (i-1) hypotheses are false.
    t = np.sort(-np.repeat(s[:-1], (s[1:] - s[:-1]).astype(np.uint8)))
    t = np.insert(-t, 0, s[-1])

    # Adjust p-values
    APVs = np.zeros((m, 1))
    aux = pairs_pvals[pairs_sorted] * t
    comparison = []
    for i in range(m):
        row = pairs_index[0][pairs_sorted[i]]
        col = pairs_index[1][pairs_sorted[i]]
        comparison.append(algorithms[row] + " vs " + algorithms[col])
        v = np.max(aux[: i + 1])
        APVs[i, 0] = np.min([v, 1])
    return pd.DataFrame(data=APVs, index=comparison, columns=["Shaffer"])




[docs]
def nemenyi(p_values):
    """
    Nemenyi's procedure for adjusted p_value computation.

    Parameters:
    -----------
    data: 2-D array or DataFrame containing the p-values.

    Returns:
    --------
    APVs: DataFrame containing the adjusted p-values.
    """

    # Initial Checking
    if type(p_values) == pd.DataFrame:
        algorithms = p_values.columns
        p_values = p_values.values
    elif type(p_values) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])])

    if p_values.ndim != 2:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")
    elif p_values.shape[0] != p_values.shape[1]:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")

    # define parameters
    k = p_values.shape[0]
    m = int(k * (k - 1) / 2.0)

    # sort p-values p(0) <= p(1) <= ... <= p(m-1)
    pairs_index = np.triu_indices(k, 1)
    pairs_pvals = p_values[pairs_index]
    pairs_sorted = np.argsort(pairs_pvals)

    # Adjust p-values
    APVs = np.zeros((m, 1))
    comparison = []
    for i in range(m):
        row = pairs_index[0][pairs_sorted[i]]
        col = pairs_index[1][pairs_sorted[i]]
        comparison.append(algorithms[row] + " vs " + algorithms[col])
        APVs[i, 0] = np.min([pairs_pvals[pairs_sorted[i]] * m, 1])
    return pd.DataFrame(data=APVs, index=comparison, columns=["Nemenyi"])
Table Of Contents

Source code for jmetal.lab.statistical_test.apv_procedures