Source code for jmetal.lab.statistical_test.apv_procedures

import numpy as np
import pandas as pd


[docs] def bonferroni_dunn(p_values, control): """ Bonferroni-Dunn's procedure for the adjusted p-value computation. Parameters: ----------- p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test. control: int or string. Index or Name of the control algorithm. Returns: -------- APVs: DataFrame containing the adjusted p-values. """ # Initial Checking if type(p_values) == pd.DataFrame: algorithms = p_values.columns p_values = p_values.values elif type(p_values) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])]) if type(control) == str: control = int(np.where(algorithms == control)[0]) if control is None: raise ValueError("Initialization ERROR. Incorrect value for control.") k = p_values.shape[1] # sort p-values p(0) <= p(1) <= ... <= p(k-1) argsorted_pvals = np.argsort(p_values[0, :]) APVs = np.zeros((k - 1, 1)) comparison = [] for i in range(k - 1): comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]]) APVs[i, 0] = np.min([(k - 1) * p_values[0, argsorted_pvals[i]], 1]) return pd.DataFrame(data=APVs, index=comparison, columns=["Bonferroni"])
[docs] def holland(p_values, control): """ Holland's procedure for the adjusted p-value computation. Parameters: ----------- p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test. control: int or string. Index or Name of the control algorithm. Returns: -------- APVs: DataFrame containing the adjusted p-values. """ # Initial Checking if type(p_values) == pd.DataFrame: algorithms = p_values.columns p_values = p_values.values elif type(p_values) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])]) if type(control) == str: control = int(np.where(algorithms == control)[0]) if control is None: raise ValueError("Initialization ERROR. Incorrect value for control.") # -------------------------------------------------------------------------- # ------------------------------- Procedure -------------------------------- # -------------------------------------------------------------------------- k = p_values.shape[1] # sort p-values p(0) <= p(1) <= ... <= p(k-1) argsorted_pvals = np.argsort(p_values[0, :]) APVs = np.zeros((k - 1, 1)) comparison = [] for i in range(k - 1): comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]]) aux = k - 1 - np.arange(i + 1) v = np.max(1 - (1 - p_values[0, argsorted_pvals[: (i + 1)]]) ** aux) APVs[i, 0] = np.min([v, 1]) return pd.DataFrame(data=APVs, index=comparison, columns=["Holland"])
[docs] def finner(p_values, control): """ Finner's procedure for the adjusted p-value computation. Parameters: ----------- p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test. control: int or string. Index or Name of the control algorithm. Returns: -------- APVs: DataFrame containing the adjusted p-values. """ # Initial Checking if type(p_values) == pd.DataFrame: algorithms = p_values.columns p_values = p_values.values elif type(p_values) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])]) if type(control) == str: control = int(np.where(algorithms == control)[0]) if control is None: raise ValueError("Initialization ERROR. Incorrect value for control.") k = p_values.shape[1] # sort p-values p(0) <= p(1) <= ... <= p(k-1) argsorted_pvals = np.argsort(p_values[0, :]) APVs = np.zeros((k - 1, 1)) comparison = [] for i in range(k - 1): comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]]) aux = float(k - 1) / (np.arange(i + 1) + 1) v = np.max(1 - (1 - p_values[0, argsorted_pvals[: (i + 1)]]) ** aux) APVs[i, 0] = np.min([v, 1]) return pd.DataFrame(data=APVs, index=comparison, columns=["Finner"])
[docs] def hochberg(p_values, control): """ Hochberg's procedure for the adjusted p-value computation. Parameters: ----------- p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test. control: int or string. Index or Name of the control algorithm. Returns: -------- APVs: DataFrame containing the adjusted p-values. """ # Initial Checking if type(p_values) == pd.DataFrame: algorithms = p_values.columns p_values = p_values.values elif type(p_values) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])]) if type(control) == str: control = int(np.where(algorithms == control)[0]) if control is None: raise ValueError("Initialization ERROR. Incorrect value for control.") k = p_values.shape[1] # sort p-values p(0) <= p(1) <= ... <= p(k-1) argsorted_pvals = np.argsort(p_values[0, :]) APVs = np.zeros((k - 1, 1)) comparison = [] for i in range(k - 1): comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]]) aux = np.arange(k, i, -1).astype(np.uint8) v = np.max(p_values[0, argsorted_pvals[aux - 1]] * (k - aux)) APVs[i, 0] = np.min([v, 1]) return pd.DataFrame(data=APVs, index=comparison, columns=["Hochberg"])
[docs] def li(p_values, control): """ Li's procedure for the adjusted p-value computation. Parameters: ----------- p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test. control: optional int or string. Default None Index or Name of the control algorithm. If control is provided, control vs all comparisons are considered, else all vs all. Returns: -------- APVs: DataFrame containing the adjusted p-values. """ # Initial Checking if type(p_values) == pd.DataFrame: algorithms = p_values.columns p_values = p_values.values elif type(p_values) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])]) if type(control) == str: control = int(np.where(algorithms == control)[0]) if control is None: raise ValueError("Initialization ERROR. Incorrect value for control.") k = p_values.shape[1] # sort p-values p(0) <= p(1) <= ... <= p(k-1) argsorted_pvals = np.argsort(p_values[0, :]) APVs = np.zeros((k - 1, 1)) comparison = [] for i in range(k - 1): comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]]) APVs[i, 0] = np.min( [ p_values[0, argsorted_pvals[-2]], p_values[0, argsorted_pvals[i]] / (p_values[0, argsorted_pvals[i]] + 1 - p_values[0, argsorted_pvals[-2]]), ] ) return pd.DataFrame(data=APVs, index=comparison, columns=["Li"])
[docs] def holm(p_values, control=None): """ Holm's procedure for the adjusted p-value computation. Parameters: ----------- p_values: 2-D array or DataFrame containing the p-values obtained from a ranking test. control: optional int or string. Default None Index or Name of the control algorithm. If control is provided, control vs all comparisons are considered, else all vs all. Returns: -------- APVs: DataFrame containing the adjusted p-values. """ # Initial Checking if type(p_values) == pd.DataFrame: algorithms = p_values.columns p_values = p_values.values elif type(p_values) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])]) if type(control) == str: control = int(np.where(algorithms == control)[0]) if type(control) == int: k = p_values.shape[1] # sort p-values p(0) <= p(1) <= ... <= p(k-1) argsorted_pvals = np.argsort(p_values[0, :]) APVs = np.zeros((k - 1, 1)) comparison = [] for i in range(k - 1): aux = k - 1 - np.arange(i + 1) comparison.append(algorithms[control] + " vs " + algorithms[argsorted_pvals[i]]) v = np.max(aux * p_values[0, argsorted_pvals[: (i + 1)]]) APVs[i, 0] = np.min([v, 1]) elif control is None: k = p_values.shape[1] m = int((k * (k - 1)) / 2.0) # sort p-values p(0) <= p(1) <= ... <= p(m-1) pairs_index = np.triu_indices(k, 1) pairs_pvals = p_values[pairs_index] pairs_sorted = np.argsort(pairs_pvals) APVs = np.zeros((m, 1)) aux = pairs_pvals[pairs_sorted] * (m - np.arange(m)) comparison = [] for i in range(m): row = pairs_index[0][pairs_sorted[i]] col = pairs_index[1][pairs_sorted[i]] comparison.append(algorithms[row] + " vs " + algorithms[col]) v = np.max(aux[: i + 1]) APVs[i, 0] = np.min([v, 1]) return pd.DataFrame(data=APVs, index=comparison, columns=["Holm"])
[docs] def shaffer(p_values): """ Shaffer's procedure for adjusted p_value ccmputation. Parameters: ----------- data: 2-D array or DataFrame containing the p-values. Returns: -------- APVs: DataFrame containing the adjusted p-values. """ def S(k): """ Computes the set of possible numbers of true hoypotheses. Parameters: ----------- k: int number of algorithms being compared. Returns ---------- TrueSet : array-like Set of true hypotheses. """ from scipy.special import binom as binomial TrueHset = [0] if k > 1: for j in np.arange(k, 0, -1, dtype=int): TrueHset = list(set(TrueHset) | set([binomial(j, 2) + x for x in S(k - j)])) return TrueHset # Initial Checking if type(p_values) == pd.DataFrame: algorithms = p_values.columns p_values = p_values.values elif type(p_values) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])]) if p_values.ndim != 2: raise ValueError("Initialization ERROR. Incorrect number of array dimensions.") elif p_values.shape[0] != p_values.shape[1]: raise ValueError("Initialization ERROR. Incorrect number of array dimensions.") # define parameters k = p_values.shape[0] m = int(k * (k - 1) / 2.0) s = np.array(S(k)[1:]) # sort p-values p(0) <= p(1) <= ... <= p(m-1) pairs_index = np.triu_indices(k, 1) pairs_pvals = p_values[pairs_index] pairs_sorted = np.argsort(pairs_pvals) # compute ti: max number of hypotheses that can be true given that any # (i-1) hypotheses are false. t = np.sort(-np.repeat(s[:-1], (s[1:] - s[:-1]).astype(np.uint8))) t = np.insert(-t, 0, s[-1]) # Adjust p-values APVs = np.zeros((m, 1)) aux = pairs_pvals[pairs_sorted] * t comparison = [] for i in range(m): row = pairs_index[0][pairs_sorted[i]] col = pairs_index[1][pairs_sorted[i]] comparison.append(algorithms[row] + " vs " + algorithms[col]) v = np.max(aux[: i + 1]) APVs[i, 0] = np.min([v, 1]) return pd.DataFrame(data=APVs, index=comparison, columns=["Shaffer"])
[docs] def nemenyi(p_values): """ Nemenyi's procedure for adjusted p_value computation. Parameters: ----------- data: 2-D array or DataFrame containing the p-values. Returns: -------- APVs: DataFrame containing the adjusted p-values. """ # Initial Checking if type(p_values) == pd.DataFrame: algorithms = p_values.columns p_values = p_values.values elif type(p_values) == np.ndarray: algorithms = np.array(["Alg%d" % alg for alg in range(p_values.shape[1])]) if p_values.ndim != 2: raise ValueError("Initialization ERROR. Incorrect number of array dimensions.") elif p_values.shape[0] != p_values.shape[1]: raise ValueError("Initialization ERROR. Incorrect number of array dimensions.") # define parameters k = p_values.shape[0] m = int(k * (k - 1) / 2.0) # sort p-values p(0) <= p(1) <= ... <= p(m-1) pairs_index = np.triu_indices(k, 1) pairs_pvals = p_values[pairs_index] pairs_sorted = np.argsort(pairs_pvals) # Adjust p-values APVs = np.zeros((m, 1)) comparison = [] for i in range(m): row = pairs_index[0][pairs_sorted[i]] col = pairs_index[1][pairs_sorted[i]] comparison.append(algorithms[row] + " vs " + algorithms[col]) APVs[i, 0] = np.min([pairs_pvals[pairs_sorted[i]] * m, 1]) return pd.DataFrame(data=APVs, index=comparison, columns=["Nemenyi"])