Source code for jmetal.lab.statistical_test.functions

from scipy.stats import binom, chi2, f, norm

from jmetal.lab.statistical_test.apv_procedures import *


[docs] def ranks(data: np.array, descending=False): """Computes the rank of the elements in data. :param data: 2-D matrix :param descending: boolean (default False). If true, rank is sorted in descending order. :return: ranks, where ranks[i][j] == rank of the i-th row w.r.t the j-th column. """ s = 0 if (descending is False) else 1 # Compute ranks. (ranks[i][j] == rank of the i-th treatment on the j-th sample.) if data.ndim == 2: ranks = np.ones(data.shape) for i in range(data.shape[0]): values, indices, rep = np.unique( (-1) ** s * np.sort((-1) ** s * data[i, :]), return_index=True, return_counts=True, ) for j in range(data.shape[1]): ranks[i, j] += indices[values == data[i, j]] + 0.5 * (rep[values == data[i, j]] - 1) return ranks elif data.ndim == 1: ranks = np.ones((data.size,)) values, indices, rep = np.unique( (-1) ** s * np.sort((-1) ** s * data), return_index=True, return_counts=True, ) for i in range(data.size): ranks[i] += indices[values == data[i]] + 0.5 * (rep[values == data[i]] - 1) return ranks
[docs] def sign_test(data): """Given the results drawn from two algorithms/methods X and Y, the sign test analyses if there is a difference between X and Y. .. note:: Null Hypothesis: Pr(X<Y)= 0.5 :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem. :return p_value: The associated p-value from the binomial distribution. :return bstat: Number of successes. """ if type(data) == pd.DataFrame: data = data.values if data.shape[1] == 2: X, Y = data[:, 0], data[:, 1] n_perf = data.shape[0] else: raise ValueError("Initialization ERROR. Incorrect number of dimensions for axis 1") # Compute the differences Z = X - Y # Compute the number of pairs Z<0 Wminus = sum(Z < 0) # If H_0 is true ---> W follows Binomial(n,0.5) p_value_minus = 1 - binom.cdf(k=Wminus, p=0.5, n=n_perf) # Compute the number of pairs Z>0 Wplus = sum(Z > 0) # If H_0 is true ---> W follows Binomial(n,0.5) p_value_plus = 1 - binom.cdf(k=Wplus, p=0.5, n=n_perf) p_value = 2 * min([p_value_minus, p_value_plus]) return pd.DataFrame( data=np.array([Wminus, Wplus, p_value]), index=["Num X<Y", "Num X>Y", "p-value"], columns=["Results"] )
[docs] def friedman_test(data): """Friedman ranking test. ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal. :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem. :return p_value: The associated p-value. :return friedman_stat: Friedman's chi-square. """ # Initial Checking if type(data) == pd.DataFrame: data = data.values if data.ndim == 2: n_samples, k = data.shape else: raise ValueError("Initialization ERROR. Incorrect number of array dimensions") if k < 2: raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.") # Compute ranks. datarank = ranks(data) # Compute for each algorithm the ranking average. avranks = np.mean(datarank, axis=0) # Get Friedman statistics friedman_stat = (12.0 * n_samples) / (k * (k + 1.0)) * (np.sum(avranks**2) - (k * (k + 1) ** 2) / 4.0) # Compute p-value p_value = 1.0 - chi2.cdf(friedman_stat, df=(k - 1)) return pd.DataFrame( data=np.array([friedman_stat, p_value]), index=["Friedman-statistic", "p-value"], columns=["Results"] )
[docs] def friedman_aligned_rank_test(data): """Method of aligned ranks for the Friedman test. ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal. :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem. :return p_value: The associated p-value. :return aligned_rank_stat: Friedman's aligned rank chi-square statistic. """ # Initial Checking if type(data) == pd.DataFrame: data = data.values if data.ndim == 2: n_samples, k = data.shape else: raise ValueError("Initialization ERROR. Incorrect number of array dimensions") if k < 2: raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.") # Compute the average value achieved by all algorithms in each problem control = np.mean(data, axis=1) # Compute the difference between control an data diff = [data[:, j] - control for j in range(data.shape[1])] # rank diff alignedRanks = ranks(np.ravel(diff)) alignedRanks = np.reshape(alignedRanks, newshape=(n_samples, k), order="F") # Compute statistic Rhat_i = np.sum(alignedRanks, axis=1) Rhat_j = np.sum(alignedRanks, axis=0) si, sj = np.sum(Rhat_i**2), np.sum(Rhat_j**2) A = sj - (k * n_samples**2 / 4.0) * (k * n_samples + 1) ** 2 B1 = k * n_samples * (k * n_samples + 1) * (2 * k * n_samples + 1) / 6.0 B2 = si / float(k) alignedRanks_stat = ((k - 1) * A) / (B1 - B2) p_value = 1 - chi2.cdf(alignedRanks_stat, df=k - 1) return pd.DataFrame( data=np.array([alignedRanks_stat, p_value]), index=["Aligned Rank stat", "p-value"], columns=["Results"] )
[docs] def quade_test(data): """Quade test. ..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal. :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem. :return p_value: The associated p-value from the F-distribution. :return fq: Computed F-value. """ # Initial Checking if type(data) == pd.DataFrame: data = data.values if data.ndim == 2: n_samples, k = data.shape else: raise ValueError("Initialization ERROR. Incorrect number of array dimensions") if k < 2: raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.") # Compute ranks. datarank = ranks(data) # Compute the range of each problem problemRange = np.max(data, axis=1) - np.min(data, axis=1) # Compute problem rank problemRank = ranks(problemRange) # Compute S_stat: weight of each observation within the problem, adjusted to reflect # the significance of the problem when it appears. S_stat = np.zeros((n_samples, k)) for i in range(n_samples): S_stat[i, :] = problemRank[i] * (datarank[i, :] - 0.5 * (k + 1)) Salg = np.sum(S_stat, axis=0) # Compute Fq (Quade Test statistic) and associated p_value A = np.sum(S_stat**2) B = np.sum(Salg**2) / float(n_samples) if A == B: Fq = np.Inf p_value = (1 / (np.math.factorial(k))) ** (n_samples - 1) else: Fq = (n_samples - 1.0) * B / (A - B) p_value = 1 - f.cdf(Fq, k - 1, (k - 1) * (n_samples - 1)) return pd.DataFrame(data=np.array([Fq, p_value]), index=["Quade Test statistic", "p-value"], columns=["Results"])
[docs] def friedman_ph_test(data, control=None, apv_procedure=None): """Friedman post-hoc test. :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem. :param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms. :param apv_procedure: optional string. Default None. Name of the procedure for computing adjusted p-values. If apv_procedure is None, adjusted p-value are not computed, else the values are computed according to the specified procedure: For 1 vs all comparisons. {'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'} For all vs all coparisons. {'Shaffer', 'Holm', 'Nemenyi'} :return z_values: Test statistic. :return p_values: The p-value according to the Studentized range distribution. """ # Initial Checking if type(data) == pd.DataFrame: algorithms = data.columns data = data.values elif type(data) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])]) if control is None: index = algorithms elif type(control) == int: index = [algorithms[control]] else: index = [control] if data.ndim == 2: n_samples, k = data.shape else: raise ValueError("Initialization ERROR. Incorrect number of array dimensions.") if k < 2: raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.") if control is not None: if type(control) == int and control >= data.shape[1]: raise ValueError("Initialization ERROR. control is out of bounds") if type(control) == str and control not in algorithms: raise ValueError("Initialization ERROR. %s is not a column name of data" % control) if apv_procedure is not None: if apv_procedure not in [ "Bonferroni", "Holm", "Hochberg", "Hommel", "Holland", "Finner", "Li", "Shaffer", "Nemenyi", ]: raise ValueError("Initialization ERROR. Incorrect value for APVprocedure.") # Compute ranks. datarank = ranks(data) # Compute for each algorithm the ranking average. avranks = np.mean(datarank, axis=0) # Compute z-values aux = np.sqrt((k * (k + 1)) / (6.0 * n_samples)) if control is None: z = np.zeros((k, k)) for i in range(k): for j in range(i + 1, k): z[i, j] = abs(avranks[i] - avranks[j]) / aux z += z.T else: if type(control) == str: control = int(np.where(algorithms == control)[0]) z = np.zeros((1, k)) for j in range(k): z[0, j] = abs(avranks[control] - avranks[j]) / aux # Compute associated p-value p_value = 2 * (1.0 - norm.cdf(z)) pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms) zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms) if apv_procedure is None: return zvalues_df, pvalues_df else: if apv_procedure == "Bonferroni": ap_vs_df = bonferroni_dunn(pvalues_df, control=control) elif apv_procedure == "Holm": ap_vs_df = holm(pvalues_df, control=control) elif apv_procedure == "Hochberg": ap_vs_df = hochberg(pvalues_df, control=control) elif apv_procedure == "Holland": ap_vs_df = holland(pvalues_df, control=control) elif apv_procedure == "Finner": ap_vs_df = finner(pvalues_df, control=control) elif apv_procedure == "Li": ap_vs_df = li(pvalues_df, control=control) elif apv_procedure == "Shaffer": ap_vs_df = shaffer(pvalues_df) elif apv_procedure == "Nemenyi": ap_vs_df = nemenyi(pvalues_df) return zvalues_df, pvalues_df, ap_vs_df
[docs] def friedman_aligned_ph_test(data, control=None, apv_procedure=None): """Friedman Aligned Ranks post-hoc test. :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem. :param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms. :param apv_procedure: optional string. Default None. Name of the procedure for computing adjusted p-values. If apv_procedure is None, adjusted p-value are not computed, else the values are computed according to the specified procedure: For 1 vs all comparisons. {'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'} For all vs all coparisons. {'Shaffer', 'Holm', 'Nemenyi'} :return z_values: Test statistic. :return p_values: The p-value according to the Studentized range distribution. """ # Initial Checking if type(data) == pd.DataFrame: algorithms = data.columns data = data.values elif type(data) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])]) if control is None: index = algorithms elif type(control) == int: index = [algorithms[control]] else: index = [control] if data.ndim == 2: n_samples, k = data.shape else: raise ValueError("Initialization ERROR. Incorrect number of array dimensions.") if k < 2: raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.") if control is not None: if type(control) == int and control >= data.shape[1]: raise ValueError("Initialization ERROR. control is out of bounds") if type(control) == str and control not in algorithms: raise ValueError("Initialization ERROR. %s is not a column name of data" % control) # Compute the average value achieved by all algorithms in each problem problemmean = np.mean(data, axis=1) # Compute the difference between control an data diff = np.zeros((n_samples, k)) for j in range(k): diff[:, j] = data[:, j] - problemmean alignedRanks = ranks(np.ravel(diff)) alignedRanks = np.reshape(alignedRanks, newshape=(n_samples, k)) # Average ranks avranks = np.mean(alignedRanks, axis=0) # Compute test statistics aux = 1.0 / np.sqrt(k * (n_samples + 1) / 6.0) if control is None: z = np.zeros((k, k)) for i in range(k): for j in range(i + 1, k): z[i, j] = abs(avranks[i] - avranks[j]) * aux z += z.T else: if type(control) == str: control = int(np.where(algorithms == control)[0]) z = np.zeros((1, k)) for j in range(k): z[0, j] = abs(avranks[control] - avranks[j]) * aux # Compute associated p-value p_value = 2 * (1.0 - norm.cdf(z)) pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms) zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms) if apv_procedure is None: return zvalues_df, pvalues_df else: if apv_procedure == "Bonferroni": ap_vs_df = bonferroni_dunn(pvalues_df, control=control) elif apv_procedure == "Holm": ap_vs_df = holm(pvalues_df, control=control) elif apv_procedure == "Hochberg": ap_vs_df = hochberg(pvalues_df, control=control) elif apv_procedure == "Holland": ap_vs_df = holland(pvalues_df, control=control) elif apv_procedure == "Finner": ap_vs_df = finner(pvalues_df, control=control) elif apv_procedure == "Li": ap_vs_df = li(pvalues_df, control=control) elif apv_procedure == "Shaffer": ap_vs_df = shaffer(pvalues_df) elif apv_procedure == "Nemenyi": ap_vs_df = nemenyi(pvalues_df) return zvalues_df, pvalues_df, ap_vs_df
[docs] def quade_ph_test(data, control=None, apv_procedure=None): """Quade post-hoc test. :param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem. :param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms. :param apv_procedure: optional string. Default None. Name of the procedure for computing adjusted p-values. If apv_procedure is None, adjusted p-value are not computed, else the values are computed according to the specified procedure: For 1 vs all comparisons. {'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'} For all vs all coparisons. {'Shaffer', 'Holm', 'Nemenyi'} :return z_values: Test statistic. :return p_values: The p-value according to the Studentized range distribution. """ # Initial Checking if type(data) == pd.DataFrame: algorithms = data.columns data = data.values elif type(data) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])]) if control is None: index = algorithms elif type(control) == int: index = [algorithms[control]] else: index = [control] if data.ndim == 2: n_samples, k = data.shape else: raise ValueError("Initialization ERROR. Incorrect number of array dimensions.") if k < 2: raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.") if control is not None: if type(control) == int and control >= data.shape[1]: raise ValueError("Initialization ERROR. control is out of bounds") if type(control) == str and control not in algorithms: raise ValueError("Initialization ERROR. %s is not a column name of data" % control) # Compute ranks. datarank = ranks(data) # Compute the range of each problem problemRange = np.max(data, axis=1) - np.min(data, axis=1) # Compute problem rank problemRank = ranks(problemRange) # Compute average rakings W = np.zeros((n_samples, k)) for i in range(n_samples): W[i, :] = problemRank[i] * datarank[i, :] avranks = 2 * np.sum(W, axis=0) / (n_samples * (n_samples + 1)) # Compute test statistics aux = 1.0 / np.sqrt(k * (k + 1) * (2 * n_samples + 1) * (k - 1) / (18.0 * n_samples * (n_samples + 1))) if control is None: z = np.zeros((k, k)) for i in range(k): for j in range(i + 1, k): z[i, j] = abs(avranks[i] - avranks[j]) * aux z += z.T else: if type(control) == str: control = int(np.where(algorithms == control)[0]) z = np.zeros((1, k)) for j in range(k): z[0, j] = abs(avranks[control] - avranks[j]) * aux # Compute associated p-value p_value = 2 * (1.0 - norm.cdf(z)) pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms) zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms) if apv_procedure is None: return zvalues_df, pvalues_df else: if apv_procedure == "Bonferroni": ap_vs_df = bonferroni_dunn(pvalues_df, control=control) elif apv_procedure == "Holm": ap_vs_df = holm(pvalues_df, control=control) elif apv_procedure == "Hochberg": ap_vs_df = hochberg(pvalues_df, control=control) elif apv_procedure == "Holland": ap_vs_df = holland(pvalues_df, control=control) elif apv_procedure == "Finner": ap_vs_df = finner(pvalues_df, control=control) elif apv_procedure == "Li": ap_vs_df = li(pvalues_df, control=control) elif apv_procedure == "Shaffer": ap_vs_df = shaffer(pvalues_df) elif apv_procedure == "Nemenyi": ap_vs_df = nemenyi(pvalues_df) return zvalues_df, pvalues_df, ap_vs_df