Source code for jmetal.lab.statistical_test.critical_distance

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels.stats.libqsturng import qsturng

from jmetal.lab.statistical_test.functions import ranks



[docs]
def NemenyiCD(alpha: float, num_alg, num_dataset):
    """Computes Nemenyi's critical difference:
    * CD = q_alpha * sqrt(num_alg*(num_alg + 1)/(6*num_prob))
    where q_alpha is the critical value, of the Studentized range statistic divided by sqrt(2).
    :param alpha: {0.1, 0.999}. Significance level.
    :param num_alg: number of tested algorithms.
    :param num_dataset: Number of problems/datasets where the algorithms have been tested.
    """

    # get critical value
    q_alpha = qsturng(p=1 - alpha, r=num_alg, v=num_alg * (num_dataset - 1)) / np.sqrt(2)

    # compute the critical difference
    cd = q_alpha * np.sqrt(num_alg * (num_alg + 1) / (6.0 * num_dataset))

    return cd




[docs]
def CDplot(
    results,
    alpha: float = 0.05,
    higher_is_better: bool = False,
    alg_names: list = None,
    output_filename: str = "cdplot.eps",
):
    """CDgraph plots the critical difference graph show in Janez Demsar's 2006 work:
    * Statistical Comparisons of Classifiers over Multiple Data Sets.
    :param results: A 2-D array containing results from each algorithm. Each row of 'results' represents an algorithm, and each column a dataset.
    :param alpha: {0.1, 0.999}. Significance level for the critical difference.
    :param alg_names: Names of the tested algorithms.
    """

    def _join_alg(avranks, num_alg, cd):
        """
        join_alg returns the set of non significant methods
        """

        # get all pairs
        sets = (-1) * np.ones((num_alg, 2))
        for i in range(num_alg):
            elements = np.where(np.logical_and(avranks - avranks[i] > 0, avranks - avranks[i] < cd))[0]
            if elements.size > 0:
                sets[i, :] = [avranks[i], avranks[elements[-1]]]
        sets = np.delete(sets, np.where(sets[:, 0] < 0)[0], axis=0)

        # group pairs
        group = sets[0, :]
        for i in range(1, sets.shape[0]):
            if sets[i - 1, 1] < sets[i, 1]:
                group = np.vstack((group, sets[i, :]))

        return group

    # Initial Checking
    if type(results) == pd.DataFrame:
        alg_names = results.index
        results = results.values
    elif type(results) == np.ndarray and alg_names is None:
        alg_names = np.array(["Alg%d" % alg for alg in range(results.shape[1])])

    if results.ndim == 2:
        num_alg, num_dataset = results.shape
    else:
        raise ValueError("Initialization ERROR: In CDplot(...) results must be 2-D array")

    # Get the critical difference
    cd = NemenyiCD(alpha, num_alg, num_dataset)

    # Compute ranks. (ranks[i][j] rank of the i-th algorithm on the j-th problem.)

    rranks = ranks(results.T, descending=higher_is_better)

    # Compute for each algorithm the ranking averages.
    avranks = np.transpose(np.mean(rranks, axis=0))
    indices = np.argsort(avranks).astype(np.uint8)
    avranks = avranks[indices]

    # Split algorithms.
    spoint = np.round(num_alg / 2.0).astype(np.uint8)
    leftalg = avranks[:spoint]
    rightalg = avranks[spoint:]
    rows = np.ceil(num_alg / 2.0).astype(np.uint8)

    # Figure settings.
    highest = np.ceil(np.max(avranks)).astype(np.uint8)  # highest shown rank
    lowest = np.floor(np.min(avranks)).astype(np.uint8)  # lowest shown rank
    width = 6  # default figure width (in inches)
    height = 0.575 * (rows + 1)  # figure height

    """
                        FIGURE
      (1,0)
        +-----+---------------------------+-------+
        |     |                           |       |
        |     |                           |       |
        |     |                           |       |
        +-----+---------------------------+-------+ stop
        |     |                           |       |
        |     |                           |       |
        |     |                           |       |
        |     |                           |       |
        |     |                           |       |
        |     |                           |       |
        +-----+---------------------------+-------+ sbottom
        |     |                           |       |
        +-----+---------------------------+-------+
            sleft                       sright     (0,1)
    """

    stop, sbottom, sleft, sright = 0.65, 0.1, 0.15, 0.85

    # main horizontal axis length
    lline = sright - sleft

    # Initialize figure
    fig = plt.figure(figsize=(width, height), facecolor="white")
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_axis_off()

    # Main horizontal axis
    ax.hlines(stop, sleft, sright, color="black", linewidth=0.7)
    for xi in range(highest - lowest + 1):
        # Plot mayor ticks
        ax.vlines(
            x=sleft + (lline * xi) / (highest - lowest), ymin=stop, ymax=stop + 0.05, color="black", linewidth=0.7
        )
        # Mayor ticks labels
        ax.text(
            x=sleft + (lline * xi) / (highest - lowest), y=stop + 0.06, s=str(lowest + xi), ha="center", va="bottom"
        )
        # Minor ticks
        if xi < highest - lowest:
            ax.vlines(
                x=sleft + (lline * (xi + 0.5)) / (highest - lowest),
                ymin=stop,
                ymax=stop + 0.025,
                color="black",
                linewidth=0.7,
            )

    # Plot lines/names for left models
    vspace = 0.5 * (stop - sbottom) / (spoint + 1)
    for i in range(spoint):
        ax.vlines(
            x=sleft + (lline * (leftalg[i] - lowest)) / (highest - lowest),
            ymin=sbottom + (spoint - 1 - i) * vspace,
            ymax=stop,
            color="black",
            linewidth=0.7,
        )
        ax.hlines(
            y=sbottom + (spoint - 1 - i) * vspace,
            xmin=sleft,
            xmax=sleft + (lline * (leftalg[i] - lowest)) / (highest - lowest),
            color="black",
            linewidth=0.7,
        )
        ax.text(x=sleft - 0.01, y=sbottom + (spoint - 1 - i) * vspace, s=alg_names[indices][i], ha="right", va="center")

    # Plot lines/names for right models
    vspace = 0.5 * (stop - sbottom) / (num_alg - spoint + 1)
    for i in range(num_alg - spoint):
        ax.vlines(
            x=sleft + (lline * (rightalg[i] - lowest)) / (highest - lowest),
            ymin=sbottom + i * vspace,
            ymax=stop,
            color="black",
            linewidth=0.7,
        )
        ax.hlines(
            y=sbottom + i * vspace,
            xmin=sleft + (lline * (rightalg[i] - lowest)) / (highest - lowest),
            xmax=sright,
            color="black",
            linewidth=0.7,
        )
        ax.text(x=sright + 0.01, y=sbottom + i * vspace, s=alg_names[indices][spoint + i], ha="left", va="center")

    # Plot critical difference rule
    if sleft + (cd * lline) / (highest - lowest) <= sright:
        ax.hlines(y=stop + 0.2, xmin=sleft, xmax=sleft + (cd * lline) / (highest - lowest), linewidth=1.5)
        ax.text(
            x=sleft + 0.5 * (cd * lline) / (highest - lowest), y=stop + 0.21, s="CD=%.3f" % cd, ha="center", va="bottom"
        )
    else:
        ax.text(x=(sleft + sright) / 2, y=stop + 0.2, s="CD=%.3f" % cd, ha="center", va="bottom")

    # Get pair of non-significant methods
    nonsig = _join_alg(avranks, num_alg, cd)
    if nonsig.ndim == 2:
        if nonsig.shape[0] == 2:
            left_lines = np.reshape(nonsig[0, :], (1, 2))
            right_lines = np.reshape(nonsig[1, :], (1, 2))
        else:
            left_lines = nonsig[: np.round(nonsig.shape[0] / 2.0).astype(np.uint8), :]
            right_lines = nonsig[np.round(nonsig.shape[0] / 2.0).astype(np.uint8) :, :]
    else:
        left_lines = np.reshape(nonsig, (1, nonsig.shape[0]))

    # plot from the left
    vspace = 0.5 * (stop - sbottom) / (left_lines.shape[0] + 1)
    for i in range(left_lines.shape[0]):
        ax.hlines(
            y=stop - (i + 1) * vspace,
            xmin=sleft + lline * (left_lines[i, 0] - lowest - 0.025) / (highest - lowest),
            xmax=sleft + lline * (left_lines[i, 1] - lowest + 0.025) / (highest - lowest),
            linewidth=2,
        )

    # plot from the rigth
    if nonsig.ndim == 2:
        vspace = 0.5 * (stop - sbottom) / (left_lines.shape[0])
        for i in range(right_lines.shape[0]):
            ax.hlines(
                y=stop - (i + 1) * vspace,
                xmin=sleft + lline * (right_lines[i, 0] - lowest - 0.025) / (highest - lowest),
                xmax=sleft + lline * (right_lines[i, 1] - lowest + 0.025) / (highest - lowest),
                linewidth=2,
            )

    plt.savefig(output_filename, bbox_inches="tight")
    plt.show()
Table Of Contents

Source code for jmetal.lab.statistical_test.critical_distance