Source code for jmetal.lab.statistical_test.functions

from scipy.stats import binom, chi2, f, norm

from jmetal.lab.statistical_test.apv_procedures import *



[docs]
def ranks(data: np.array, descending=False):
    """Computes the rank of the elements in data.

    :param data: 2-D matrix
    :param descending: boolean (default False). If true, rank is sorted in descending order.
    :return: ranks, where ranks[i][j] == rank of the i-th row w.r.t the j-th column.
    """
    s = 0 if (descending is False) else 1

    # Compute ranks. (ranks[i][j] == rank of the i-th treatment on the j-th sample.)
    if data.ndim == 2:
        ranks = np.ones(data.shape)
        for i in range(data.shape[0]):
            values, indices, rep = np.unique(
                (-1) ** s * np.sort((-1) ** s * data[i, :]),
                return_index=True,
                return_counts=True,
            )
            for j in range(data.shape[1]):
                ranks[i, j] += indices[values == data[i, j]] + 0.5 * (rep[values == data[i, j]] - 1)
        return ranks
    elif data.ndim == 1:
        ranks = np.ones((data.size,))
        values, indices, rep = np.unique(
            (-1) ** s * np.sort((-1) ** s * data),
            return_index=True,
            return_counts=True,
        )
        for i in range(data.size):
            ranks[i] += indices[values == data[i]] + 0.5 * (rep[values == data[i]] - 1)
        return ranks




[docs]
def sign_test(data):
    """Given the results drawn from two algorithms/methods X and Y, the sign test analyses if
    there is a difference between X and Y.

    .. note:: Null Hypothesis: Pr(X<Y)= 0.5

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :return p_value: The associated p-value from the binomial distribution.
    :return bstat: Number of successes.
    """

    if type(data) == pd.DataFrame:
        data = data.values

    if data.shape[1] == 2:
        X, Y = data[:, 0], data[:, 1]
        n_perf = data.shape[0]
    else:
        raise ValueError("Initialization ERROR. Incorrect number of dimensions for axis 1")

    # Compute the differences
    Z = X - Y
    # Compute the number of pairs Z<0
    Wminus = sum(Z < 0)
    # If H_0 is true ---> W follows Binomial(n,0.5)
    p_value_minus = 1 - binom.cdf(k=Wminus, p=0.5, n=n_perf)

    # Compute the number of pairs Z>0
    Wplus = sum(Z > 0)
    # If H_0 is true ---> W follows Binomial(n,0.5)
    p_value_plus = 1 - binom.cdf(k=Wplus, p=0.5, n=n_perf)

    p_value = 2 * min([p_value_minus, p_value_plus])

    return pd.DataFrame(
        data=np.array([Wminus, Wplus, p_value]), index=["Num X<Y", "Num X>Y", "p-value"], columns=["Results"]
    )




[docs]
def friedman_test(data):
    """Friedman ranking test.

    ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :return p_value: The associated p-value.
    :return friedman_stat: Friedman's chi-square.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        data = data.values

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions")
    if k < 2:
        raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")

    # Compute ranks.
    datarank = ranks(data)

    # Compute for each algorithm the ranking average.
    avranks = np.mean(datarank, axis=0)

    # Get Friedman statistics
    friedman_stat = (12.0 * n_samples) / (k * (k + 1.0)) * (np.sum(avranks**2) - (k * (k + 1) ** 2) / 4.0)

    # Compute p-value
    p_value = 1.0 - chi2.cdf(friedman_stat, df=(k - 1))

    return pd.DataFrame(
        data=np.array([friedman_stat, p_value]), index=["Friedman-statistic", "p-value"], columns=["Results"]
    )




[docs]
def friedman_aligned_rank_test(data):
    """Method of aligned ranks for the Friedman test.

    ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :return p_value: The associated p-value.
    :return aligned_rank_stat: Friedman's aligned rank chi-square statistic.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        data = data.values

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions")
    if k < 2:
        raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")

    # Compute the average value achieved by all algorithms in each problem
    control = np.mean(data, axis=1)
    # Compute the difference between control an data
    diff = [data[:, j] - control for j in range(data.shape[1])]
    # rank diff
    alignedRanks = ranks(np.ravel(diff))
    alignedRanks = np.reshape(alignedRanks, newshape=(n_samples, k), order="F")

    # Compute statistic
    Rhat_i = np.sum(alignedRanks, axis=1)
    Rhat_j = np.sum(alignedRanks, axis=0)
    si, sj = np.sum(Rhat_i**2), np.sum(Rhat_j**2)

    A = sj - (k * n_samples**2 / 4.0) * (k * n_samples + 1) ** 2
    B1 = k * n_samples * (k * n_samples + 1) * (2 * k * n_samples + 1) / 6.0
    B2 = si / float(k)

    alignedRanks_stat = ((k - 1) * A) / (B1 - B2)

    p_value = 1 - chi2.cdf(alignedRanks_stat, df=k - 1)

    return pd.DataFrame(
        data=np.array([alignedRanks_stat, p_value]), index=["Aligned Rank stat", "p-value"], columns=["Results"]
    )




[docs]
def quade_test(data):
    """Quade test.

    ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :return p_value: The associated p-value from the F-distribution.
    :return fq: Computed F-value.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        data = data.values

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions")
    if k < 2:
        raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")

    # Compute ranks.
    datarank = ranks(data)
    # Compute the range of each problem
    problemRange = np.max(data, axis=1) - np.min(data, axis=1)
    # Compute problem rank
    problemRank = ranks(problemRange)

    # Compute S_stat: weight of each observation within the problem, adjusted to reflect
    # the significance of the problem when it appears.
    S_stat = np.zeros((n_samples, k))
    for i in range(n_samples):
        S_stat[i, :] = problemRank[i] * (datarank[i, :] - 0.5 * (k + 1))
    Salg = np.sum(S_stat, axis=0)

    # Compute Fq (Quade Test statistic) and associated p_value
    A = np.sum(S_stat**2)
    B = np.sum(Salg**2) / float(n_samples)

    if A == B:
        Fq = np.Inf
        p_value = (1 / (np.math.factorial(k))) ** (n_samples - 1)
    else:
        Fq = (n_samples - 1.0) * B / (A - B)
        p_value = 1 - f.cdf(Fq, k - 1, (k - 1) * (n_samples - 1))

    return pd.DataFrame(data=np.array([Fq, p_value]), index=["Quade Test statistic", "p-value"], columns=["Results"])




[docs]
def friedman_ph_test(data, control=None, apv_procedure=None):
    """Friedman post-hoc test.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms.
    :param apv_procedure: optional string. Default None.
        Name of the procedure for computing adjusted p-values. If apv_procedure
        is None, adjusted p-value are not computed, else the values are computed
        according to the specified procedure:
        For 1 vs all comparisons.
            {'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'}
        For all vs all coparisons.
            {'Shaffer', 'Holm', 'Nemenyi'}

    :return z_values: Test statistic.
    :return p_values: The p-value according to the Studentized range distribution.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        algorithms = data.columns
        data = data.values
    elif type(data) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])])

    if control is None:
        index = algorithms
    elif type(control) == int:
        index = [algorithms[control]]
    else:
        index = [control]

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")
    if k < 2:
        raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")

    if control is not None:
        if type(control) == int and control >= data.shape[1]:
            raise ValueError("Initialization ERROR. control is out of bounds")
        if type(control) == str and control not in algorithms:
            raise ValueError("Initialization ERROR. %s is not a column name of data" % control)

    if apv_procedure is not None:
        if apv_procedure not in [
            "Bonferroni",
            "Holm",
            "Hochberg",
            "Hommel",
            "Holland",
            "Finner",
            "Li",
            "Shaffer",
            "Nemenyi",
        ]:
            raise ValueError("Initialization ERROR. Incorrect value for APVprocedure.")

    # Compute ranks.
    datarank = ranks(data)
    # Compute for each algorithm the ranking average.
    avranks = np.mean(datarank, axis=0)

    # Compute z-values
    aux = np.sqrt((k * (k + 1)) / (6.0 * n_samples))

    if control is None:
        z = np.zeros((k, k))
        for i in range(k):
            for j in range(i + 1, k):
                z[i, j] = abs(avranks[i] - avranks[j]) / aux
        z += z.T
    else:
        if type(control) == str:
            control = int(np.where(algorithms == control)[0])
        z = np.zeros((1, k))
        for j in range(k):
            z[0, j] = abs(avranks[control] - avranks[j]) / aux

    # Compute associated p-value
    p_value = 2 * (1.0 - norm.cdf(z))

    pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms)
    zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms)

    if apv_procedure is None:
        return zvalues_df, pvalues_df
    else:
        if apv_procedure == "Bonferroni":
            ap_vs_df = bonferroni_dunn(pvalues_df, control=control)
        elif apv_procedure == "Holm":
            ap_vs_df = holm(pvalues_df, control=control)
        elif apv_procedure == "Hochberg":
            ap_vs_df = hochberg(pvalues_df, control=control)
        elif apv_procedure == "Holland":
            ap_vs_df = holland(pvalues_df, control=control)
        elif apv_procedure == "Finner":
            ap_vs_df = finner(pvalues_df, control=control)
        elif apv_procedure == "Li":
            ap_vs_df = li(pvalues_df, control=control)
        elif apv_procedure == "Shaffer":
            ap_vs_df = shaffer(pvalues_df)
        elif apv_procedure == "Nemenyi":
            ap_vs_df = nemenyi(pvalues_df)

        return zvalues_df, pvalues_df, ap_vs_df




[docs]
def friedman_aligned_ph_test(data, control=None, apv_procedure=None):
    """Friedman Aligned Ranks post-hoc test.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms.
    :param apv_procedure: optional string. Default None.
        Name of the procedure for computing adjusted p-values. If apv_procedure
        is None, adjusted p-value are not computed, else the values are computed
        according to the specified procedure:
        For 1 vs all comparisons.
            {'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'}
        For all vs all coparisons.
            {'Shaffer', 'Holm', 'Nemenyi'}

    :return z_values: Test statistic.
    :return p_values: The p-value according to the Studentized range distribution.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        algorithms = data.columns
        data = data.values
    elif type(data) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])])

    if control is None:
        index = algorithms
    elif type(control) == int:
        index = [algorithms[control]]
    else:
        index = [control]

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")
    if k < 2:
        raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")

    if control is not None:
        if type(control) == int and control >= data.shape[1]:
            raise ValueError("Initialization ERROR. control is out of bounds")
        if type(control) == str and control not in algorithms:
            raise ValueError("Initialization ERROR. %s is not a column name of data" % control)

    # Compute the average value achieved by all algorithms in each problem
    problemmean = np.mean(data, axis=1)
    # Compute the difference between control an data
    diff = np.zeros((n_samples, k))
    for j in range(k):
        diff[:, j] = data[:, j] - problemmean

    alignedRanks = ranks(np.ravel(diff))
    alignedRanks = np.reshape(alignedRanks, newshape=(n_samples, k))

    # Average ranks
    avranks = np.mean(alignedRanks, axis=0)

    # Compute test statistics
    aux = 1.0 / np.sqrt(k * (n_samples + 1) / 6.0)
    if control is None:
        z = np.zeros((k, k))
        for i in range(k):
            for j in range(i + 1, k):
                z[i, j] = abs(avranks[i] - avranks[j]) * aux
        z += z.T
    else:
        if type(control) == str:
            control = int(np.where(algorithms == control)[0])
        z = np.zeros((1, k))
        for j in range(k):
            z[0, j] = abs(avranks[control] - avranks[j]) * aux

    # Compute associated p-value
    p_value = 2 * (1.0 - norm.cdf(z))

    pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms)
    zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms)

    if apv_procedure is None:
        return zvalues_df, pvalues_df
    else:
        if apv_procedure == "Bonferroni":
            ap_vs_df = bonferroni_dunn(pvalues_df, control=control)
        elif apv_procedure == "Holm":
            ap_vs_df = holm(pvalues_df, control=control)
        elif apv_procedure == "Hochberg":
            ap_vs_df = hochberg(pvalues_df, control=control)
        elif apv_procedure == "Holland":
            ap_vs_df = holland(pvalues_df, control=control)
        elif apv_procedure == "Finner":
            ap_vs_df = finner(pvalues_df, control=control)
        elif apv_procedure == "Li":
            ap_vs_df = li(pvalues_df, control=control)
        elif apv_procedure == "Shaffer":
            ap_vs_df = shaffer(pvalues_df)
        elif apv_procedure == "Nemenyi":
            ap_vs_df = nemenyi(pvalues_df)

        return zvalues_df, pvalues_df, ap_vs_df




[docs]
def quade_ph_test(data, control=None, apv_procedure=None):
    """Quade post-hoc test.

    :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
    :param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms.
    :param apv_procedure: optional string. Default None.
        Name of the procedure for computing adjusted p-values. If apv_procedure
        is None, adjusted p-value are not computed, else the values are computed
        according to the specified procedure:
        For 1 vs all comparisons.
            {'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'}
        For all vs all coparisons.
            {'Shaffer', 'Holm', 'Nemenyi'}

    :return z_values: Test statistic.
    :return p_values: The p-value according to the Studentized range distribution.
    """

    # Initial Checking
    if type(data) == pd.DataFrame:
        algorithms = data.columns
        data = data.values
    elif type(data) == np.ndarray:
        algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])])

    if control is None:
        index = algorithms
    elif type(control) == int:
        index = [algorithms[control]]
    else:
        index = [control]

    if data.ndim == 2:
        n_samples, k = data.shape
    else:
        raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")
    if k < 2:
        raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")

    if control is not None:
        if type(control) == int and control >= data.shape[1]:
            raise ValueError("Initialization ERROR. control is out of bounds")
        if type(control) == str and control not in algorithms:
            raise ValueError("Initialization ERROR. %s is not a column name of data" % control)

    # Compute ranks.
    datarank = ranks(data)
    # Compute the range of each problem
    problemRange = np.max(data, axis=1) - np.min(data, axis=1)
    # Compute problem rank
    problemRank = ranks(problemRange)

    # Compute average rakings
    W = np.zeros((n_samples, k))
    for i in range(n_samples):
        W[i, :] = problemRank[i] * datarank[i, :]
    avranks = 2 * np.sum(W, axis=0) / (n_samples * (n_samples + 1))
    # Compute test statistics
    aux = 1.0 / np.sqrt(k * (k + 1) * (2 * n_samples + 1) * (k - 1) / (18.0 * n_samples * (n_samples + 1)))
    if control is None:
        z = np.zeros((k, k))
        for i in range(k):
            for j in range(i + 1, k):
                z[i, j] = abs(avranks[i] - avranks[j]) * aux
        z += z.T
    else:
        if type(control) == str:
            control = int(np.where(algorithms == control)[0])
        z = np.zeros((1, k))
        for j in range(k):
            z[0, j] = abs(avranks[control] - avranks[j]) * aux

    # Compute associated p-value
    p_value = 2 * (1.0 - norm.cdf(z))

    pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms)
    zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms)

    if apv_procedure is None:
        return zvalues_df, pvalues_df
    else:
        if apv_procedure == "Bonferroni":
            ap_vs_df = bonferroni_dunn(pvalues_df, control=control)
        elif apv_procedure == "Holm":
            ap_vs_df = holm(pvalues_df, control=control)
        elif apv_procedure == "Hochberg":
            ap_vs_df = hochberg(pvalues_df, control=control)
        elif apv_procedure == "Holland":
            ap_vs_df = holland(pvalues_df, control=control)
        elif apv_procedure == "Finner":
            ap_vs_df = finner(pvalues_df, control=control)
        elif apv_procedure == "Li":
            ap_vs_df = li(pvalues_df, control=control)
        elif apv_procedure == "Shaffer":
            ap_vs_df = shaffer(pvalues_df)
        elif apv_procedure == "Nemenyi":
            ap_vs_df = nemenyi(pvalues_df)

        return zvalues_df, pvalues_df, ap_vs_df
Table Of Contents

Source code for jmetal.lab.statistical_test.functions