Source code for jmetal.lab.experiment

import io
import os
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
from statistics import median
from typing import List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu, iqr, ks_2samp

from jmetal.core.algorithm import Algorithm
from jmetal.core.quality_indicator import QualityIndicator
from jmetal.logger import get_logger
from jmetal.util.solution import (
    print_function_values_to_file,
    print_variables_to_file,
    read_solutions,
)

logger = get_logger(__name__)

"""
.. module:: laboratory
   :platform: Unix, Windows
   :synopsis: Run experiments. WIP!

.. moduleauthor:: Antonio Benítez-Hidalgo <antonio.b@uma.es>
"""


[docs] class Job: def __init__(self, algorithm: Algorithm, algorithm_tag: str, problem_tag: str, run: int): self.algorithm = algorithm self.algorithm_tag = algorithm_tag self.problem_tag = problem_tag self.run_tag = run
[docs] def execute(self, output_path: str = ""): self.algorithm.run() if output_path: file_name = os.path.join(output_path, "FUN.{}.tsv".format(self.run_tag)) print_function_values_to_file(self.algorithm.result(), filename=file_name) file_name = os.path.join(output_path, "VAR.{}.tsv".format(self.run_tag)) print_variables_to_file(self.algorithm.result(), filename=file_name) file_name = os.path.join(output_path, "TIME.{}".format(self.run_tag)) with open(file_name, "w+") as of: of.write(str(self.algorithm.total_computing_time))
[docs] def get_algorithm_data(self): return self.algorithm.observable_data()
[docs] class Experiment: def __init__(self, output_dir: str, jobs: List[Job], m_workers: int = 6): """Run an experiment to execute a list of jobs. :param output_dir: Base directory where each job will save its results. :param jobs: List of Jobs (from :py:mod:`jmetal.util.laboratory)`) to be executed. :param m_workers: Maximum number of workers to execute the Jobs in parallel. """ self.jobs = jobs self.m_workers = m_workers self.output_dir = output_dir self.job_data = []
[docs] def run(self) -> None: with ProcessPoolExecutor(max_workers=self.m_workers) as executor: for job in self.jobs: output_path = os.path.join(self.output_dir, job.algorithm_tag, job.problem_tag) executor.submit(job.execute(output_path)) self.job_data.append(job.get_algorithm_data())
[docs] def generate_summary_from_experiment( input_dir: str, quality_indicators: List[QualityIndicator], reference_fronts: str = "" ): """Compute a list of quality indicators. The input data directory *must* met the following structure (this is generated automatically by the Experiment class): * <base_dir> * algorithm_a * problem_a * FUN.0.tsv * FUN.1.tsv * VAR.0.tsv * VAR.1.tsv * ... :param input_dir: Directory where all the input data is found (function values and variables). :param reference_fronts: Directory where reference fronts are found. :param quality_indicators: List of quality indicators to compute. :return: None. """ if not quality_indicators: quality_indicators = [] with open("QualityIndicatorSummary.csv", "w+") as of: of.write("Algorithm,Problem,ExecutionId,IndicatorName,IndicatorValue\n") for dirname, _, filenames in os.walk(input_dir): for filename in filenames: try: # Linux filesystem algorithm, problem = dirname.split("/")[-2:] except ValueError: # Windows filesystem algorithm, problem = dirname.split("\\")[-2:] if "TIME" in filename: run_tag = [s for s in filename.split(".") if s.isdigit()].pop() with open(os.path.join(dirname, filename), "r") as content_file: content = content_file.read() with open("QualityIndicatorSummary.csv", "a+") as of: of.write(",".join([algorithm, problem, run_tag, "Time", str(content)])) of.write("\n") if "FUN" in filename: solutions = read_solutions(os.path.join(dirname, filename)) run_tag = [s for s in filename.split(".") if s.isdigit()].pop() for indicator in quality_indicators: reference_front_file = os.path.join(reference_fronts, problem + ".pf") # Add reference front if any if hasattr(indicator, "reference_front"): if Path(reference_front_file).is_file(): reference_front = [] with open(reference_front_file) as file: for line in file: reference_front.append([float(x) for x in line.split()]) indicator.reference_front = reference_front else: logger.warning("Reference front not found at", reference_front_file) result = indicator.compute([solutions[i].objectives for i in range(len(solutions))]) # Save quality indicator value to file with open("QualityIndicatorSummary.csv", "a+") as of: of.write(",".join([algorithm, problem, run_tag, indicator.get_short_name(), str(result)])) of.write("\n")
def generate_boxplot(filename: str, output_dir: str = "boxplot"): """Generate boxplot diagrams. :param filename: Input filename (summary). :param output_dir: Output path. """ df = pd.read_csv(filename, skipinitialspace=True) if len(set(df.columns.tolist())) != 5: raise Exception("Wrong number of columns") if Path(output_dir).is_dir(): logger.warning("Directory {} exists. Removing contents.".format(output_dir)) for file in os.listdir(output_dir): os.remove("{0}/{1}".format(output_dir, file)) else: logger.warning("Directory {} does not exist. Creating it.".format(output_dir)) Path(output_dir).mkdir(parents=True) algorithms = pd.unique(df["Algorithm"]) problems = pd.unique(df["Problem"]) indicators = pd.unique(df["IndicatorName"]) # We consider the quality indicator indicator_name for indicator_name in indicators: data = df[df["IndicatorName"] == indicator_name] for pr in problems: data_to_plot = [] for alg in algorithms: data_to_plot.append( data["IndicatorValue"][np.logical_and(data["Algorithm"] == alg, data["Problem"] == pr)] ) # Create a figure instance fig = plt.figure(1, figsize=(9, 6)) plt.suptitle(pr, y=0.95, fontsize=18) ax = fig.add_subplot(111) ax.boxplot(data_to_plot) ax.set_xticklabels(algorithms) ax.tick_params(labelsize=20) plt.savefig(os.path.join(output_dir, "boxplot-{}-{}.png".format(pr, indicator_name)), bbox_inches="tight") plt.savefig(os.path.join(output_dir, "boxplot-{}-{}.eps".format(pr, indicator_name)), bbox_inches="tight") plt.close(fig) def generate_latex_tables(filename: str, output_dir: str = "latex/statistical"): """Computes a number of statistical values (mean, median, standard deviation, interquartile range). :param filename: Input filename (summary). :param output_dir: Output path. """ df = pd.read_csv(filename, skipinitialspace=True) if len(set(df.columns.tolist())) != 5: raise Exception("Wrong number of columns") if Path(output_dir).is_dir(): logger.warning("Directory {} exists. Removing contents.".format(output_dir)) for file in os.listdir(output_dir): os.remove("{0}/{1}".format(output_dir, file)) else: logger.warning("Directory {} does not exist. Creating it.".format(output_dir)) Path(output_dir).mkdir(parents=True) # Generate median & iqr tables median, iqr = pd.DataFrame(), pd.DataFrame() mean, std = pd.DataFrame(), pd.DataFrame() for algorithm_name, subset in df.groupby("Algorithm", sort=False): subset = subset.drop("Algorithm", axis=1) subset = subset.rename(columns={"IndicatorValue": algorithm_name}) subset = subset.set_index(["Problem", "IndicatorName", "ExecutionId"]) # Compute Median and Interquartile range median_ = subset.groupby(level=[0, 1]).median() median = pd.concat([median, median_], axis=1) iqr_ = subset.groupby(level=[0, 1]).quantile(0.75) - subset.groupby(level=[0, 1]).quantile(0.25) iqr = pd.concat([iqr, iqr_], axis=1) # Compute Mean and Standard deviation mean_ = subset.groupby(level=[0, 1]).mean() mean = pd.concat([mean, mean_], axis=1) std_ = subset.groupby(level=[0, 1]).std() std = pd.concat([std, std_], axis=1) # Generate mean & std tables for indicator_name, subset in std.groupby("IndicatorName", sort=False): subset = median.groupby("IndicatorName", sort=False).get_group(indicator_name) subset.index = subset.index.droplevel(1) subset.to_csv(os.path.join(output_dir, "Median-{}.csv".format(indicator_name)), sep="\t", encoding="utf-8") subset = iqr.groupby("IndicatorName", sort=False).get_group(indicator_name) subset.index = subset.index.droplevel(1) subset.to_csv(os.path.join(output_dir, "IQR-{}.csv".format(indicator_name)), sep="\t", encoding="utf-8") subset = mean.groupby("IndicatorName", sort=False).get_group(indicator_name) subset.index = subset.index.droplevel(1) subset.to_csv(os.path.join(output_dir, "Mean-{}.csv".format(indicator_name)), sep="\t", encoding="utf-8") subset = std.groupby("IndicatorName", sort=False).get_group(indicator_name) subset.index = subset.index.droplevel(1) subset.to_csv(os.path.join(output_dir, "Std-{}.csv".format(indicator_name)), sep="\t", encoding="utf-8") # Generate LaTeX tables for indicator_name in df.groupby("IndicatorName", sort=False).groups.keys(): # Median & IQR md = median.groupby("IndicatorName", sort=False).get_group(indicator_name) md.index = md.index.droplevel(1) i = iqr.groupby("IndicatorName", sort=False).get_group(indicator_name) i.index = i.index.droplevel(1) with open(os.path.join(output_dir, "MedianIQR-{}.tex".format(indicator_name)), "w") as latex: latex.write( __averages_to_latex( md, i, caption="Median and Interquartile Range of the {} quality indicator.".format(indicator_name), minimization=check_minimization(indicator_name), label="table:{}".format(indicator_name), ) ) # Mean & Std mn = mean.groupby("IndicatorName", sort=False).get_group(indicator_name) mn.index = mn.index.droplevel(1) s = std.groupby("IndicatorName", sort=False).get_group(indicator_name) s.index = s.index.droplevel(1) with open(os.path.join(output_dir, "MeanStd-{}.tex".format(indicator_name)), "w") as latex: latex.write( __averages_to_latex( mn, s, caption="Mean and Standard Deviation of the {} quality indicator.".format(indicator_name), minimization=check_minimization(indicator_name), label="table:{}".format(indicator_name), ) ) def compute_wilcoxon(filename: str, output_dir: str = "latex/wilcoxon"): """ :param filename: Input filename (summary). :param output_dir: Output path. """ df = pd.read_csv(filename, skipinitialspace=True) if len(set(df.columns.tolist())) != 5: raise Exception("Wrong number of columns") if Path(output_dir).is_dir(): logger.warning("Directory {} exists. Removing contents.".format(output_dir)) for file in os.listdir(output_dir): os.remove("{0}/{1}".format(output_dir, file)) else: logger.warning("Directory {} does not exist. Creating it.".format(output_dir)) Path(output_dir).mkdir(parents=True) algorithms = pd.unique(df["Algorithm"]) problems = pd.unique(df["Problem"]) indicators = pd.unique(df["IndicatorName"]) table = pd.DataFrame(index=algorithms[0:-1], columns=algorithms[1:]) for indicator_name in indicators: for i, row_algorithm in enumerate(algorithms[0:-1]): wilcoxon = [] for j, col_algorithm in enumerate(algorithms[1:]): line = [] if i <= j: for problem in problems: df1 = df[ (df["Algorithm"] == row_algorithm) & (df["Problem"] == problem) & (df["IndicatorName"] == indicator_name) ] df2 = df[ (df["Algorithm"] == col_algorithm) & (df["Problem"] == problem) & (df["IndicatorName"] == indicator_name) ] data1 = df1["IndicatorValue"] data2 = df2["IndicatorValue"] median1 = median(data1) median2 = median(data2) stat, p = mannwhitneyu(data1, data2) if p <= 0.05: if check_minimization(indicator_name): if median1 <= median2: line.append("+") else: line.append("o") else: if median1 >= median2: line.append("+") else: line.append("o") else: line.append("-") wilcoxon.append("".join(line)) if len(wilcoxon) < len(algorithms): wilcoxon = [""] * (len(algorithms) - len(wilcoxon) - 1) + wilcoxon table.loc[row_algorithm] = wilcoxon table.to_csv(os.path.join(output_dir, "Wilcoxon-{}.csv".format(indicator_name)), sep="\t", encoding="utf-8") with open(os.path.join(output_dir, "Wilcoxon-{}.tex".format(indicator_name)), "w") as latex: latex.write( __wilcoxon_to_latex( table, caption="Wilcoxon values of the {} quality indicator ({}).".format( indicator_name, ", ".join(problems) ), label="table:{}".format(indicator_name), ) ) def compute_mean_indicator(filename: str, indicator_name: str): """Compute the mean values of an indicator. :param filename: :param indicator_name: Quality indicator name. """ df = pd.read_csv(filename, skipinitialspace=True) if len(set(df.columns.tolist())) != 5: raise Exception("Wrong number of columns") algorithms = pd.unique(df["Algorithm"]) problems = pd.unique(df["Problem"]) # We consider the quality indicator indicator_name data = df[df["IndicatorName"] == indicator_name] # Compute for each pair algorithm/problem the average of IndicatorValue average_values = np.zeros((problems.size, algorithms.size)) j = 0 for alg in algorithms: i = 0 for pr in problems: average_values[i, j] = data["IndicatorValue"][ np.logical_and(data["Algorithm"] == alg, data["Problem"] == pr) ].mean() i += 1 j += 1 # Generate dataFrame from average values and order columns by name df = pd.DataFrame(data=average_values, index=problems, columns=algorithms) df = df.reindex(df.columns, axis=1) return df
[docs] def generate_median_and_wilcoxon_latex_tables(filename: str, output_dir: str = "latex/meansAndWilcoxon"): """Generate Latex tables including medians and IQRs. Additionally, the last algorithm is considered as the reference algorithm, and the cells include a symbol indicating whether the differences with the reference algorithm are significant or not according to the Wilcoxon rank sum test. :param filename: Input filename (summary). :param output_dir: Output path. """ data = pd.read_csv(filename, skipinitialspace=True) if len(set(data.columns.tolist())) != 5: raise Exception("Wrong number of columns") if Path(output_dir).is_dir(): logger.warning("Directory {} exists. Removing contents.".format(output_dir)) for file in os.listdir(output_dir): os.remove("{0}/{1}".format(output_dir, file)) else: logger.warning("Directory {} does not exist. Creating it.".format(output_dir)) Path(output_dir).mkdir(parents=True) algorithms = pd.unique(data["Algorithm"]) problems = pd.unique(data["Problem"]) indicators = pd.unique(data["IndicatorName"]) control_algorithm = algorithms[-1] # Compute medians and IQRs medians = data.groupby(["Algorithm", "Problem", "IndicatorName"])["IndicatorValue"].median() iqrs = data.groupby(["Algorithm", "Problem", "IndicatorName"])["IndicatorValue"].apply(lambda x: iqr(x)) # Create data frame to store the Wilcoxon test results wilcoxon_data = pd.DataFrame(columns=["Indicator", "Algorithm", "Problem", "PValue", "Median", "TestResult"]) for indicator in indicators: for algorithm in algorithms: for problem in problems: algorithm_data = data[(data["Problem"] == problem) & (data["Algorithm"] == algorithm) & ( data["IndicatorName"] == indicator)] ref_data = data[(data["Problem"] == problem) & (data["Algorithm"] == control_algorithm) & ( data["IndicatorName"] == indicator)] stat, p_value = mannwhitneyu(algorithm_data["IndicatorValue"], ref_data["IndicatorValue"]) test_result = "" if p_value <= 0.05: if check_minimization(indicator): if medians[algorithm][problem][indicator] <= medians[control_algorithm][problem][indicator]: test_result = '+' else: test_result = '-' else: if medians[algorithm][problem][indicator] >= medians[control_algorithm][problem][indicator]: test_result = '+' else: test_result = '-' else: test_result = '=' new_row = {'Indicator': indicator, 'Algorithm': algorithm, "Problem": problem, "PValue": p_value, "Median": medians[algorithm][problem][indicator], "IQR": iqrs[algorithm][problem][indicator], "TestResult": test_result } wilcoxon_data = wilcoxon_data._append(new_row, ignore_index=True) # Generate LaTeX tables caption = "Median and interquartile range (IQR) of the results of the {} quality indicator. " + \ "Cells with dark and light gray background highlights, respectively, the best and second best indicator values. " + \ "The algorithm in the last column is the reference " + \ "algorithm, and the symbols $+$, $-$ and $\\approx$ indicate that the differences with the reference " + \ "algorithm are significantly better, worse, or there is no difference according to the Wilcoxon rank " + \ "sum test (confidence level: 95\%)." for indicator_name in indicators: with open(os.path.join(output_dir, "MedianIQRWilcoxon-{}.tex".format(indicator_name)), "w") as latex: latex.write( __median_wilcoxon_to_latex( indicator_name, wilcoxon_data, caption=caption.format(indicator_name), label="table:{}".format(indicator_name), ) )
[docs] def generate_kolmogorov_smirnov_latex_tables(filename: str, output_dir: str = "latex/KolmogorovSmirnov"): """Generate Latex tables with the results of the Kolmogorov-Smirnov test. The last algorithm is considered as the reference algorithm, and the cells include a symbol with the p-value < 0.05. :param filename: Input filename (summary). :param output_dir: Output path. """ data = pd.read_csv(filename, skipinitialspace=True) if len(set(data.columns.tolist())) != 5: raise Exception("Wrong number of columns") if Path(output_dir).is_dir(): logger.warning("Directory {} exists. Removing contents.".format(output_dir)) for file in os.listdir(output_dir): os.remove("{0}/{1}".format(output_dir, file)) else: logger.warning("Directory {} does not exist. Creating it.".format(output_dir)) Path(output_dir).mkdir(parents=True) algorithms = pd.unique(data["Algorithm"]) problems = pd.unique(data["Problem"]) indicators = pd.unique(data["IndicatorName"]) control_algorithm = algorithms[-1] # Create data frame to store the Kolmogorov Smirnov test results test_data = pd.DataFrame(columns=["Indicator", "Algorithm", "Problem", "PValue", "TestResult"]) for indicator in indicators: for algorithm in algorithms: for problem in problems: algorithm_data = data[(data["Problem"] == problem) & (data["Algorithm"] == algorithm) & ( data["IndicatorName"] == indicator)] ref_data = data[(data["Problem"] == problem) & (data["Algorithm"] == control_algorithm) & ( data["IndicatorName"] == indicator)] stat, p_value = ks_2samp(algorithm_data["IndicatorValue"], ref_data["IndicatorValue"]) test_result = stat new_row = {'Indicator': indicator, 'Algorithm': algorithm, "Problem": problem, "PValue": p_value, "TestResult": test_result } test_data = test_data._append(new_row, ignore_index=True) # Generate LaTeX tables caption = "Kolmogorov-Smirnov Test of the {} quality indicator. " \ "The algorithm in the last column is the reference " + \ "algorithm and each cell contain the p-value obtained when applying the test with the reference " \ "algorithm. Cells with gray background highlight p-values less than 0.05 (i.e., the null hypothesis" \ " -- the two distributions are identical -- is rejected)." for indicator_name in indicators: with open(os.path.join(output_dir, "KolmogorovSmirnov-{}.tex".format(indicator_name)), "w") as latex: latex.write( __kolmogorov_smirnov_to_latex( indicator_name, test_data, caption=caption.format(indicator_name), label="table:{}".format(indicator_name), ) )
def __averages_to_latex( central_tendency: pd.DataFrame, dispersion: pd.DataFrame, caption: str, label: str, minimization=True, alignment: str = "c", ): """Convert a pandas DataFrame to a LaTeX tabular. Prints labels in bold and does use math mode. :param caption: LaTeX table caption. :param label: LaTeX table label. :param minimization: If indicator is minimization, highlight the best values of mean/median; else, the lowest. """ num_columns, num_rows = central_tendency.shape[1], central_tendency.shape[0] output = io.StringIO() col_format = "{}|{}".format(alignment, alignment * num_columns) column_labels = ["\\textbf{{{0}}}".format(label.replace("_", "\\_")) for label in central_tendency.columns] # Write header output.write("\\documentclass{article}\n") output.write("\\usepackage[utf8]{inputenc}\n") output.write("\\usepackage{tabularx}\n") output.write("\\usepackage{colortbl}\n") output.write("\\usepackage[table*]{xcolor}\n") output.write("\\xdefinecolor{gray95}{gray}{0.65}\n") output.write("\\xdefinecolor{gray25}{gray}{0.8}\n") output.write("\\title{Median and IQR}\n") output.write("\\author{}\n") output.write("\\begin{document}\n") output.write("\\maketitle\n") output.write("\\section{Table}\n") output.write("\\begin{table}[!htp]\n") output.write(" \\caption{{{}}}\n".format(caption)) output.write(" \\label{{{}}}\n".format(label)) output.write(" \\centering\n") output.write(" \\begin{scriptsize}\n") output.write(" \\begin{tabular}{%s}\n" % col_format) output.write(" & {} \\\\\\hline\n".format(" & ".join(column_labels))) # Write data lines for i in range(num_rows): central_values = [v for v in central_tendency.iloc[i]] dispersion_values = [v for v in dispersion.iloc[i]] # Sort mean/median values (the lower the better if minimization) # Note that mean/median values could be the same: in that case, sort by Std/IQR (the lower the better) sorted_values = sorted( zip(central_values, dispersion_values, [i for i in range(len(central_values))]), key=lambda v: (v[0], -v[1]) ) if minimization: second_best, best = sorted_values[0][2], sorted_values[1][2] else: second_best, best = sorted_values[-1][2], sorted_values[-2][2] # Compose cell values = [ "{:.2e}_{{{:.2e}}}".format(central_values[i], dispersion_values[i]) for i in range(len(central_values)) ] # Highlight values values[best] = "\\cellcolor{gray25} " + values[best] values[second_best] = "\\cellcolor{gray95} " + values[second_best] output.write( " \\textbf{{{0}}} & ${1}$ \\\\\n".format( central_tendency.index[i], " $ & $ ".join([str(val) for val in values]) ) ) # Write footer output.write(" \\end{tabular}\n") output.write(" \\end{scriptsize}\n") output.write("\\end{table}\n") output.write("\\end{document}") return output.getvalue() def __wilcoxon_to_latex(df: pd.DataFrame, caption: str, label: str, minimization=True, alignment: str = "c"): """Convert a pandas DataFrame to a LaTeX tabular. Prints labels in bold and does use math mode. :param df: Pandas dataframe. :param caption: LaTeX table caption. :param label: LaTeX table label. :param minimization: If indicator is minimization, highlight the best values of mean/median; else, the lowest. """ num_columns, num_rows = df.shape[1], df.shape[0] output = io.StringIO() col_format = "{}|{}".format(alignment, alignment * num_columns) column_labels = ["\\textbf{{{0}}}".format(label.replace("_", "\\_")) for label in df.columns] # Write header output.write("\\documentclass{article}\n") output.write("\\usepackage[utf8]{inputenc}\n") output.write("\\usepackage{tabularx}\n") output.write("\\usepackage{amssymb}\n") output.write("\\usepackage{amsmath}\n") output.write("\\title{Wilcoxon - Mann-Whitney rank sum test}\n") output.write("\\author{}\n") output.write("\\begin{document}\n") output.write("\\maketitle\n") output.write("\\section{Table}\n") output.write("\\begin{table}[!htp]\n") output.write(" \\caption{{{}}}\n".format(caption)) output.write(" \\label{{{}}}\n".format(label)) output.write(" \\centering\n") output.write(" \\begin{scriptsize}\n") output.write(" \\begin{tabular}{%s}\n" % col_format) output.write(" & {} \\\\\\hline\n".format(" & ".join(column_labels))) symbolo = "\\triangledown\ " symbolplus = "\\blacktriangle\ " if not minimization: symbolo, symbolplus = symbolplus, symbolo # Write data lines for i in range(num_rows): values = [val.replace("-", "\\text{--}\ ").replace("o", symbolo).replace("+", symbolplus) for val in df.iloc[i]] output.write( " \\textbf{{{0}}} & ${1}$ \\\\\n".format(df.index[i], " $ & $ ".join([str(val) for val in values])) ) # Write footer output.write(" \\end{tabular}\n") output.write(" \\end{scriptsize}\n") output.write("\\end{table}\n") output.write("\\end{document}") return output.getvalue() def __median_wilcoxon_to_latex( indicator_name: str, wilcoxon_data: pd.DataFrame, caption: str, label): indicator_data = wilcoxon_data[wilcoxon_data["Indicator"] == indicator_name] problems = pd.unique(indicator_data["Problem"]) algorithms = pd.unique(indicator_data["Algorithm"]) num_columns = len(algorithms) columns = algorithms alignment = "c" col_format = "{}|{}".format(alignment, alignment * num_columns) column_labels = ["\\textbf{{{0}}}".format(label.replace("_", "\\_")) for label in columns] output = io.StringIO() output.write("\\documentclass{article}\n") output.write("\\usepackage[utf8]{inputenc}\n") output.write("\\usepackage{tabularx}\n") output.write("\\usepackage{colortbl}\n") output.write("\\usepackage[table*]{xcolor}\n") output.write("\\xdefinecolor{gray95}{gray}{0.65}\n") output.write("\\xdefinecolor{gray25}{gray}{0.8}\n") output.write("\\title{Median and Wilcoxon}\n") output.write("\\author{}\n") output.write("\\begin{document}\n") output.write("\\maketitle\n") output.write("\\section{Table}\n") output.write("\\begin{table}[!htp]\n") output.write(" \\caption{{{}}}\n".format(caption)) output.write(" \\label{{{}}}\n".format(label)) output.write(" \\centering\n") output.write(" \\begin{tiny}\n") output.write(" \\begin{tabular}{%s}\n" % col_format) output.write(" & {} \\\\\\hline\n".format(" & ".join(column_labels))) # Counts the number of times that an algorithm performs better, worse or equal than the reference algorithm counters = {} for algorithm in algorithms: counters[algorithm] = [0, 0, 0] # best, equal, worse for problem in problems: values = [] for algorithm in algorithms: row = indicator_data[(indicator_data["Problem"] == problem) & (indicator_data["Algorithm"] == algorithm)] value = "{:.2e}({:.2e})".format(row["Median"].tolist()[0], row["IQR"].tolist()[0]) # Include the symbol according to the Wilcoxon rank sum test with the reference algorithm if algorithm != algorithms[-1]: if row["TestResult"].tolist()[0] == "-": value = "{{{}-}}".format(value) counters[algorithm][2] = counters[algorithm][2] + 1 elif row["TestResult"].tolist()[0] == "+": value = "{{{}+}}".format(value) counters[algorithm][0] = counters[algorithm][0] + 1 else: value = "{{{}\\approx}}".format(value) counters[algorithm][1] = counters[algorithm][1] + 1 values.append(value) # Find the best and second best values medians = indicator_data[(indicator_data["Problem"] == problem)]["Median"] iqrs = indicator_data[(indicator_data["Problem"] == problem)]["IQR"] pairs = list(zip(medians, iqrs)) indexes = sorted(range(len(pairs)), key=lambda x: pairs[x]) if check_minimization(indicator_name): best = indexes[0] second_best = indexes[1] else: best = indexes[-1] second_best = indexes[-2] values[best] = "\\cellcolor{gray95} " + values[best] values[second_best] = "\\cellcolor{gray25} " + values[second_best] output.write( "\\textbf{{{0}}} & ${1}$ \\\\\n".format(problem, " $ & $ ".join( [str(val).replace("e-", "e\makebox[0.1cm]{-}").replace("e+", "e\makebox[0.1cm]{+}") for val in values]) ) ) # Select all but the last counter counter_summary = [] for algorithm in algorithms[:-1]: counter_summary.append(counters[algorithm]) output.write(" \\hline\n") output.write( "\\textbf{{{0}}} & ${1}$ \\\\\n".format("$+/\\approx/-$", " $ & $ ".join( [str(val[0]) + "/" + str(val[1]) + "/" + str(val[2]) for val in counter_summary]))) # Write footer output.write(" \\end{tabular}\n") output.write(" \\end{tiny}\n") output.write("\\end{table}\n") output.write("\\end{document}") return output.getvalue() def __kolmogorov_smirnov_to_latex(indicator_name: str, test_data: pd.DataFrame, caption: str, label: str): indicator_data = test_data[test_data["Indicator"] == indicator_name] problems = pd.unique(indicator_data["Problem"]) algorithms = pd.unique(indicator_data["Algorithm"]) num_columns = len(algorithms) columns = algorithms alignment = "c" col_format = "{}|{}".format(alignment, alignment * num_columns) column_labels = ["\\textbf{{{0}}}".format(label.replace("_", "\\_")) for label in columns] output = io.StringIO() output.write("\\documentclass{article}\n") output.write("\\usepackage[utf8]{inputenc}\n") output.write("\\usepackage{tabularx}\n") output.write("\\usepackage{colortbl}\n") output.write("\\usepackage[table*]{xcolor}\n") output.write("\\xdefinecolor{gray95}{gray}{0.65}\n") output.write("\\xdefinecolor{gray25}{gray}{0.8}\n") output.write("\\title{Kolmogorov-Smirnov Test}\n") output.write("\\author{}\n") output.write("\\begin{document}\n") output.write("\\maketitle\n") output.write("\\section{Table}\n") output.write("\\begin{table}[!htp]\n") output.write(" \\caption{{{}}}\n".format(caption)) output.write(" \\label{{{}}}\n".format(label)) output.write(" \\centering\n") output.write(" \\begin{tiny}\n") output.write(" \\begin{tabular}{%s}\n" % col_format) output.write(" & {} \\\\\\hline\n".format(" & ".join(column_labels))) for problem in problems: values = [] for algorithm in algorithms[:-1]: row = indicator_data[(indicator_data["Problem"] == problem) & (indicator_data["Algorithm"] == algorithm)] value = "{:.2e}".format(row["PValue"].tolist()[0]) if (row["PValue"].tolist()[0] < 0.05): value = "\\cellcolor{gray25} " + value values.append(value) values.append("-") output.write( "\\textbf{{{0}}} & ${1}$ \\\\\n".format(problem, " $ & $ ".join( [str(val).replace("e-", "e\makebox[0.1cm]{-}").replace("e+", "e\makebox[0.1cm]{+}") for val in values]) ) ) output.write(" \\hline\n") # Write footer output.write(" \\end{tabular}\n") output.write(" \\end{tiny}\n") output.write("\\end{table}\n") output.write("\\end{document}") return output.getvalue() def check_minimization(indicator) -> bool: if indicator == "HV": return False else: return True