from scipy.stats import binom, chi2, f, norm
from jmetal.lab.statistical_test.apv_procedures import *
[docs]
def ranks(data: np.array, descending=False):
"""Computes the rank of the elements in data.
:param data: 2-D matrix
:param descending: boolean (default False). If true, rank is sorted in descending order.
:return: ranks, where ranks[i][j] == rank of the i-th row w.r.t the j-th column.
"""
s = 0 if (descending is False) else 1
# Compute ranks. (ranks[i][j] == rank of the i-th treatment on the j-th sample.)
if data.ndim == 2:
ranks = np.ones(data.shape)
for i in range(data.shape[0]):
values, indices, rep = np.unique(
(-1) ** s * np.sort((-1) ** s * data[i, :]),
return_index=True,
return_counts=True,
)
for j in range(data.shape[1]):
ranks[i, j] += indices[values == data[i, j]] + 0.5 * (rep[values == data[i, j]] - 1)
return ranks
elif data.ndim == 1:
ranks = np.ones((data.size,))
values, indices, rep = np.unique(
(-1) ** s * np.sort((-1) ** s * data),
return_index=True,
return_counts=True,
)
for i in range(data.size):
ranks[i] += indices[values == data[i]] + 0.5 * (rep[values == data[i]] - 1)
return ranks
[docs]
def sign_test(data):
"""Given the results drawn from two algorithms/methods X and Y, the sign test analyses if
there is a difference between X and Y.
.. note:: Null Hypothesis: Pr(X<Y)= 0.5
:param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
:return p_value: The associated p-value from the binomial distribution.
:return bstat: Number of successes.
"""
if type(data) == pd.DataFrame:
data = data.values
if data.shape[1] == 2:
X, Y = data[:, 0], data[:, 1]
n_perf = data.shape[0]
else:
raise ValueError("Initialization ERROR. Incorrect number of dimensions for axis 1")
# Compute the differences
Z = X - Y
# Compute the number of pairs Z<0
Wminus = sum(Z < 0)
# If H_0 is true ---> W follows Binomial(n,0.5)
p_value_minus = 1 - binom.cdf(k=Wminus, p=0.5, n=n_perf)
# Compute the number of pairs Z>0
Wplus = sum(Z > 0)
# If H_0 is true ---> W follows Binomial(n,0.5)
p_value_plus = 1 - binom.cdf(k=Wplus, p=0.5, n=n_perf)
p_value = 2 * min([p_value_minus, p_value_plus])
return pd.DataFrame(
data=np.array([Wminus, Wplus, p_value]), index=["Num X<Y", "Num X>Y", "p-value"], columns=["Results"]
)
[docs]
def friedman_test(data):
"""Friedman ranking test.
..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.
:param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
:return p_value: The associated p-value.
:return friedman_stat: Friedman's chi-square.
"""
# Initial Checking
if type(data) == pd.DataFrame:
data = data.values
if data.ndim == 2:
n_samples, k = data.shape
else:
raise ValueError("Initialization ERROR. Incorrect number of array dimensions")
if k < 2:
raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")
# Compute ranks.
datarank = ranks(data)
# Compute for each algorithm the ranking average.
avranks = np.mean(datarank, axis=0)
# Get Friedman statistics
friedman_stat = (12.0 * n_samples) / (k * (k + 1.0)) * (np.sum(avranks**2) - (k * (k + 1) ** 2) / 4.0)
# Compute p-value
p_value = 1.0 - chi2.cdf(friedman_stat, df=(k - 1))
return pd.DataFrame(
data=np.array([friedman_stat, p_value]), index=["Friedman-statistic", "p-value"], columns=["Results"]
)
[docs]
def friedman_aligned_rank_test(data):
"""Method of aligned ranks for the Friedman test.
..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.
:param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
:return p_value: The associated p-value.
:return aligned_rank_stat: Friedman's aligned rank chi-square statistic.
"""
# Initial Checking
if type(data) == pd.DataFrame:
data = data.values
if data.ndim == 2:
n_samples, k = data.shape
else:
raise ValueError("Initialization ERROR. Incorrect number of array dimensions")
if k < 2:
raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")
# Compute the average value achieved by all algorithms in each problem
control = np.mean(data, axis=1)
# Compute the difference between control an data
diff = [data[:, j] - control for j in range(data.shape[1])]
# rank diff
alignedRanks = ranks(np.ravel(diff))
alignedRanks = np.reshape(alignedRanks, newshape=(n_samples, k), order="F")
# Compute statistic
Rhat_i = np.sum(alignedRanks, axis=1)
Rhat_j = np.sum(alignedRanks, axis=0)
si, sj = np.sum(Rhat_i**2), np.sum(Rhat_j**2)
A = sj - (k * n_samples**2 / 4.0) * (k * n_samples + 1) ** 2
B1 = k * n_samples * (k * n_samples + 1) * (2 * k * n_samples + 1) / 6.0
B2 = si / float(k)
alignedRanks_stat = ((k - 1) * A) / (B1 - B2)
p_value = 1 - chi2.cdf(alignedRanks_stat, df=k - 1)
return pd.DataFrame(
data=np.array([alignedRanks_stat, p_value]), index=["Aligned Rank stat", "p-value"], columns=["Results"]
)
[docs]
def quade_test(data):
"""Quade test.
..note:: Null Hypothesis: In a set of k (>=2) treaments (or tested algorithms), all the treatments are equivalent, so their average ranks should be equal.
:param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
:return p_value: The associated p-value from the F-distribution.
:return fq: Computed F-value.
"""
# Initial Checking
if type(data) == pd.DataFrame:
data = data.values
if data.ndim == 2:
n_samples, k = data.shape
else:
raise ValueError("Initialization ERROR. Incorrect number of array dimensions")
if k < 2:
raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")
# Compute ranks.
datarank = ranks(data)
# Compute the range of each problem
problemRange = np.max(data, axis=1) - np.min(data, axis=1)
# Compute problem rank
problemRank = ranks(problemRange)
# Compute S_stat: weight of each observation within the problem, adjusted to reflect
# the significance of the problem when it appears.
S_stat = np.zeros((n_samples, k))
for i in range(n_samples):
S_stat[i, :] = problemRank[i] * (datarank[i, :] - 0.5 * (k + 1))
Salg = np.sum(S_stat, axis=0)
# Compute Fq (Quade Test statistic) and associated p_value
A = np.sum(S_stat**2)
B = np.sum(Salg**2) / float(n_samples)
if A == B:
Fq = np.Inf
p_value = (1 / (np.math.factorial(k))) ** (n_samples - 1)
else:
Fq = (n_samples - 1.0) * B / (A - B)
p_value = 1 - f.cdf(Fq, k - 1, (k - 1) * (n_samples - 1))
return pd.DataFrame(data=np.array([Fq, p_value]), index=["Quade Test statistic", "p-value"], columns=["Results"])
[docs]
def friedman_ph_test(data, control=None, apv_procedure=None):
"""Friedman post-hoc test.
:param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
:param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms.
:param apv_procedure: optional string. Default None.
Name of the procedure for computing adjusted p-values. If apv_procedure
is None, adjusted p-value are not computed, else the values are computed
according to the specified procedure:
For 1 vs all comparisons.
{'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'}
For all vs all coparisons.
{'Shaffer', 'Holm', 'Nemenyi'}
:return z_values: Test statistic.
:return p_values: The p-value according to the Studentized range distribution.
"""
# Initial Checking
if type(data) == pd.DataFrame:
algorithms = data.columns
data = data.values
elif type(data) == np.ndarray:
algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])])
if control is None:
index = algorithms
elif type(control) == int:
index = [algorithms[control]]
else:
index = [control]
if data.ndim == 2:
n_samples, k = data.shape
else:
raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")
if k < 2:
raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")
if control is not None:
if type(control) == int and control >= data.shape[1]:
raise ValueError("Initialization ERROR. control is out of bounds")
if type(control) == str and control not in algorithms:
raise ValueError("Initialization ERROR. %s is not a column name of data" % control)
if apv_procedure is not None:
if apv_procedure not in [
"Bonferroni",
"Holm",
"Hochberg",
"Hommel",
"Holland",
"Finner",
"Li",
"Shaffer",
"Nemenyi",
]:
raise ValueError("Initialization ERROR. Incorrect value for APVprocedure.")
# Compute ranks.
datarank = ranks(data)
# Compute for each algorithm the ranking average.
avranks = np.mean(datarank, axis=0)
# Compute z-values
aux = np.sqrt((k * (k + 1)) / (6.0 * n_samples))
if control is None:
z = np.zeros((k, k))
for i in range(k):
for j in range(i + 1, k):
z[i, j] = abs(avranks[i] - avranks[j]) / aux
z += z.T
else:
if type(control) == str:
control = int(np.where(algorithms == control)[0])
z = np.zeros((1, k))
for j in range(k):
z[0, j] = abs(avranks[control] - avranks[j]) / aux
# Compute associated p-value
p_value = 2 * (1.0 - norm.cdf(z))
pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms)
zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms)
if apv_procedure is None:
return zvalues_df, pvalues_df
else:
if apv_procedure == "Bonferroni":
ap_vs_df = bonferroni_dunn(pvalues_df, control=control)
elif apv_procedure == "Holm":
ap_vs_df = holm(pvalues_df, control=control)
elif apv_procedure == "Hochberg":
ap_vs_df = hochberg(pvalues_df, control=control)
elif apv_procedure == "Holland":
ap_vs_df = holland(pvalues_df, control=control)
elif apv_procedure == "Finner":
ap_vs_df = finner(pvalues_df, control=control)
elif apv_procedure == "Li":
ap_vs_df = li(pvalues_df, control=control)
elif apv_procedure == "Shaffer":
ap_vs_df = shaffer(pvalues_df)
elif apv_procedure == "Nemenyi":
ap_vs_df = nemenyi(pvalues_df)
return zvalues_df, pvalues_df, ap_vs_df
[docs]
def friedman_aligned_ph_test(data, control=None, apv_procedure=None):
"""Friedman Aligned Ranks post-hoc test.
:param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
:param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms.
:param apv_procedure: optional string. Default None.
Name of the procedure for computing adjusted p-values. If apv_procedure
is None, adjusted p-value are not computed, else the values are computed
according to the specified procedure:
For 1 vs all comparisons.
{'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'}
For all vs all coparisons.
{'Shaffer', 'Holm', 'Nemenyi'}
:return z_values: Test statistic.
:return p_values: The p-value according to the Studentized range distribution.
"""
# Initial Checking
if type(data) == pd.DataFrame:
algorithms = data.columns
data = data.values
elif type(data) == np.ndarray:
algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])])
if control is None:
index = algorithms
elif type(control) == int:
index = [algorithms[control]]
else:
index = [control]
if data.ndim == 2:
n_samples, k = data.shape
else:
raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")
if k < 2:
raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")
if control is not None:
if type(control) == int and control >= data.shape[1]:
raise ValueError("Initialization ERROR. control is out of bounds")
if type(control) == str and control not in algorithms:
raise ValueError("Initialization ERROR. %s is not a column name of data" % control)
# Compute the average value achieved by all algorithms in each problem
problemmean = np.mean(data, axis=1)
# Compute the difference between control an data
diff = np.zeros((n_samples, k))
for j in range(k):
diff[:, j] = data[:, j] - problemmean
alignedRanks = ranks(np.ravel(diff))
alignedRanks = np.reshape(alignedRanks, newshape=(n_samples, k))
# Average ranks
avranks = np.mean(alignedRanks, axis=0)
# Compute test statistics
aux = 1.0 / np.sqrt(k * (n_samples + 1) / 6.0)
if control is None:
z = np.zeros((k, k))
for i in range(k):
for j in range(i + 1, k):
z[i, j] = abs(avranks[i] - avranks[j]) * aux
z += z.T
else:
if type(control) == str:
control = int(np.where(algorithms == control)[0])
z = np.zeros((1, k))
for j in range(k):
z[0, j] = abs(avranks[control] - avranks[j]) * aux
# Compute associated p-value
p_value = 2 * (1.0 - norm.cdf(z))
pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms)
zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms)
if apv_procedure is None:
return zvalues_df, pvalues_df
else:
if apv_procedure == "Bonferroni":
ap_vs_df = bonferroni_dunn(pvalues_df, control=control)
elif apv_procedure == "Holm":
ap_vs_df = holm(pvalues_df, control=control)
elif apv_procedure == "Hochberg":
ap_vs_df = hochberg(pvalues_df, control=control)
elif apv_procedure == "Holland":
ap_vs_df = holland(pvalues_df, control=control)
elif apv_procedure == "Finner":
ap_vs_df = finner(pvalues_df, control=control)
elif apv_procedure == "Li":
ap_vs_df = li(pvalues_df, control=control)
elif apv_procedure == "Shaffer":
ap_vs_df = shaffer(pvalues_df)
elif apv_procedure == "Nemenyi":
ap_vs_df = nemenyi(pvalues_df)
return zvalues_df, pvalues_df, ap_vs_df
[docs]
def quade_ph_test(data, control=None, apv_procedure=None):
"""Quade post-hoc test.
:param data: An (n x 2) array or DataFrame contaning the results. In data, each column represents an algorithm and, and each row a problem.
:param control: optional int or string. Default None. Index or Name of the control algorithm. If control = None all FriedmanPosHocTest considers all possible comparisons among algorithms.
:param apv_procedure: optional string. Default None.
Name of the procedure for computing adjusted p-values. If apv_procedure
is None, adjusted p-value are not computed, else the values are computed
according to the specified procedure:
For 1 vs all comparisons.
{'Bonferroni', 'Holm', 'Hochberg', 'Holland', 'Finner', 'Li'}
For all vs all coparisons.
{'Shaffer', 'Holm', 'Nemenyi'}
:return z_values: Test statistic.
:return p_values: The p-value according to the Studentized range distribution.
"""
# Initial Checking
if type(data) == pd.DataFrame:
algorithms = data.columns
data = data.values
elif type(data) == np.ndarray:
algorithms = np.array(["Alg%d" % alg for alg in range(data.shape[1])])
if control is None:
index = algorithms
elif type(control) == int:
index = [algorithms[control]]
else:
index = [control]
if data.ndim == 2:
n_samples, k = data.shape
else:
raise ValueError("Initialization ERROR. Incorrect number of array dimensions.")
if k < 2:
raise ValueError("Initialization Error. Incorrect number of dimensions for axis 1.")
if control is not None:
if type(control) == int and control >= data.shape[1]:
raise ValueError("Initialization ERROR. control is out of bounds")
if type(control) == str and control not in algorithms:
raise ValueError("Initialization ERROR. %s is not a column name of data" % control)
# Compute ranks.
datarank = ranks(data)
# Compute the range of each problem
problemRange = np.max(data, axis=1) - np.min(data, axis=1)
# Compute problem rank
problemRank = ranks(problemRange)
# Compute average rakings
W = np.zeros((n_samples, k))
for i in range(n_samples):
W[i, :] = problemRank[i] * datarank[i, :]
avranks = 2 * np.sum(W, axis=0) / (n_samples * (n_samples + 1))
# Compute test statistics
aux = 1.0 / np.sqrt(k * (k + 1) * (2 * n_samples + 1) * (k - 1) / (18.0 * n_samples * (n_samples + 1)))
if control is None:
z = np.zeros((k, k))
for i in range(k):
for j in range(i + 1, k):
z[i, j] = abs(avranks[i] - avranks[j]) * aux
z += z.T
else:
if type(control) == str:
control = int(np.where(algorithms == control)[0])
z = np.zeros((1, k))
for j in range(k):
z[0, j] = abs(avranks[control] - avranks[j]) * aux
# Compute associated p-value
p_value = 2 * (1.0 - norm.cdf(z))
pvalues_df = pd.DataFrame(data=p_value, index=index, columns=algorithms)
zvalues_df = pd.DataFrame(data=z, index=index, columns=algorithms)
if apv_procedure is None:
return zvalues_df, pvalues_df
else:
if apv_procedure == "Bonferroni":
ap_vs_df = bonferroni_dunn(pvalues_df, control=control)
elif apv_procedure == "Holm":
ap_vs_df = holm(pvalues_df, control=control)
elif apv_procedure == "Hochberg":
ap_vs_df = hochberg(pvalues_df, control=control)
elif apv_procedure == "Holland":
ap_vs_df = holland(pvalues_df, control=control)
elif apv_procedure == "Finner":
ap_vs_df = finner(pvalues_df, control=control)
elif apv_procedure == "Li":
ap_vs_df = li(pvalues_df, control=control)
elif apv_procedure == "Shaffer":
ap_vs_df = shaffer(pvalues_df)
elif apv_procedure == "Nemenyi":
ap_vs_df = nemenyi(pvalues_df)
return zvalues_df, pvalues_df, ap_vs_df