Source code for jmetal.util.distance

from abc import ABC, abstractmethod
from enum import Enum
from typing import List, Union, Optional

import numpy
from scipy.spatial import distance

"""
.. module:: distance
   :platform: Unix, Windows
   :synopsis: implementation of distances between entities

.. moduleauthor:: Antonio J. Nebro <antonio@lcc.uma.es>
"""


[docs] class DistanceMetric(Enum): """ Enumeration of available distance metrics for optimized distance calculations. Each metric is optimized for specific use cases: - L2_SQUARED: Fastest, avoids sqrt computation for relative distance comparisons - LINF: Efficient for high-dimensional spaces, emphasizes maximum difference - TCHEBY_WEIGHTED: Flexible weighted distance allowing preference specification """ L2_SQUARED = "l2_squared" LINF = "linf" TCHEBY_WEIGHTED = "tcheby_weighted"
[docs] class DistanceCalculator: """ High-performance distance calculator supporting multiple metrics. This utility class provides optimized implementations for different distance metrics commonly used in multi-objective optimization. All methods are static for efficiency and support numpy array operations for vectorized performance. """
[docs] @staticmethod def calculate_distance(point1: numpy.ndarray, point2: numpy.ndarray, metric: DistanceMetric, weights: Optional[numpy.ndarray] = None) -> float: """ Calculate distance between two points using the specified metric. Args: point1: First point as numpy array point2: Second point as numpy array metric: Distance metric to use weights: Optional weights for TCHEBY_WEIGHTED metric Returns: float: Distance between points according to specified metric Raises: ValueError: If metric is invalid or weights are required but not provided """ if metric == DistanceMetric.L2_SQUARED: return DistanceCalculator._l2_squared(point1, point2) elif metric == DistanceMetric.LINF: return DistanceCalculator._linf(point1, point2) elif metric == DistanceMetric.TCHEBY_WEIGHTED: if weights is None: raise ValueError("Weights required for TCHEBY_WEIGHTED metric") return DistanceCalculator._tcheby_weighted(point1, point2, weights) else: raise ValueError(f"Unknown distance metric: {metric}")
@staticmethod def _l2_squared(point1: numpy.ndarray, point2: numpy.ndarray) -> float: """ Calculate squared Euclidean distance (L2 squared). This is faster than regular Euclidean distance as it avoids the sqrt operation. Suitable for relative distance comparisons where absolute values don't matter. Args: point1: First point point2: Second point Returns: float: Squared Euclidean distance """ diff = point1 - point2 return numpy.dot(diff, diff) @staticmethod def _linf(point1: numpy.ndarray, point2: numpy.ndarray) -> float: """ Calculate L-infinity (Chebyshev) distance. This metric emphasizes the maximum difference across all dimensions. Efficient for high-dimensional spaces and robust to outliers. Args: point1: First point point2: Second point Returns: float: L-infinity distance (maximum absolute difference) """ return numpy.max(numpy.abs(point1 - point2)) @staticmethod def _tcheby_weighted(point1: numpy.ndarray, point2: numpy.ndarray, weights: numpy.ndarray) -> float: """ Calculate weighted Chebyshev distance. This metric allows specifying importance weights for different dimensions. Useful when objectives have different scales or importance levels. Args: point1: First point point2: Second point weights: Weight vector for each dimension Returns: float: Weighted Chebyshev distance """ weighted_diff = weights * numpy.abs(point1 - point2) return numpy.max(weighted_diff)
[docs] @staticmethod def calculate_distance_matrix(points: numpy.ndarray, metric: DistanceMetric, weights: Optional[numpy.ndarray] = None) -> numpy.ndarray: """ Calculate pairwise distance matrix between all points using vectorized operations. This is significantly faster than calculating distances individually, especially for large numbers of points. Uses optimized NumPy operations for maximum performance. Args: points: 2D array where each row is a point (n_points, n_dimensions) metric: Distance metric to use weights: Optional weights for TCHEBY_WEIGHTED metric Returns: numpy.ndarray: Symmetric distance matrix (n_points, n_points) Raises: ValueError: If metric is invalid or weights are required but not provided """ n_points = points.shape[0] if metric == DistanceMetric.L2_SQUARED: return DistanceCalculator._l2_squared_matrix(points) elif metric == DistanceMetric.LINF: return DistanceCalculator._linf_matrix(points) elif metric == DistanceMetric.TCHEBY_WEIGHTED: if weights is None: raise ValueError("Weights required for TCHEBY_WEIGHTED metric") return DistanceCalculator._tcheby_weighted_matrix(points, weights) else: raise ValueError(f"Unknown distance metric: {metric}")
@staticmethod def _l2_squared_matrix(points: numpy.ndarray) -> numpy.ndarray: """ Calculate squared Euclidean distance matrix using vectorized operations. Uses broadcasting and matrix operations for optimal performance. Formula: ||a-b||² = ||a||² + ||b||² - 2⟨a,b⟩ Args: points: 2D array of points (n_points, n_dimensions) Returns: numpy.ndarray: Symmetric squared distance matrix """ # Calculate squared norms for each point squared_norms = numpy.sum(points**2, axis=1) # Calculate dot products matrix dot_products = numpy.dot(points, points.T) # Use broadcasting to create distance matrix # ||a-b||² = ||a||² + ||b||² - 2⟨a,b⟩ distance_matrix = (squared_norms[:, numpy.newaxis] + squared_norms[numpy.newaxis, :] - 2 * dot_products) # Ensure diagonal is exactly zero (numerical precision) numpy.fill_diagonal(distance_matrix, 0.0) # Ensure non-negative distances (numerical precision) distance_matrix = numpy.maximum(distance_matrix, 0.0) return distance_matrix @staticmethod def _linf_matrix(points: numpy.ndarray) -> numpy.ndarray: """ Calculate L-infinity distance matrix using vectorized operations. Uses broadcasting for efficient computation of maximum absolute differences. Args: points: 2D array of points (n_points, n_dimensions) Returns: numpy.ndarray: Symmetric L-infinity distance matrix """ n_points = points.shape[0] # Reshape for broadcasting: (n_points, 1, n_dims) - (1, n_points, n_dims) points_expanded1 = points[:, numpy.newaxis, :] points_expanded2 = points[numpy.newaxis, :, :] # Calculate absolute differences and take maximum along last axis distance_matrix = numpy.max(numpy.abs(points_expanded1 - points_expanded2), axis=2) return distance_matrix @staticmethod def _tcheby_weighted_matrix(points: numpy.ndarray, weights: numpy.ndarray) -> numpy.ndarray: """ Calculate weighted Chebyshev distance matrix using vectorized operations. Args: points: 2D array of points (n_points, n_dimensions) weights: Weight vector for each dimension Returns: numpy.ndarray: Symmetric weighted Chebyshev distance matrix """ n_points = points.shape[0] # Reshape for broadcasting points_expanded1 = points[:, numpy.newaxis, :] points_expanded2 = points[numpy.newaxis, :, :] # Apply weights and calculate maximum weighted difference weighted_diff = weights * numpy.abs(points_expanded1 - points_expanded2) distance_matrix = numpy.max(weighted_diff, axis=2) return distance_matrix
[docs] @staticmethod def calculate_min_distances_vectorized(points: numpy.ndarray, selected_indices: List[int], metric: DistanceMetric, weights: Optional[numpy.ndarray] = None) -> numpy.ndarray: """ Calculate minimum distances from each point to the selected points using vectorized operations. This is optimized for the subset selection process where we need to find the minimum distance from each candidate point to any already selected point. Args: points: 2D array of all points (n_points, n_dimensions) selected_indices: List of indices of already selected points metric: Distance metric to use weights: Optional weights for TCHEBY_WEIGHTED metric Returns: numpy.ndarray: Array of minimum distances for each point """ if len(selected_indices) == 0: # If no points selected yet, return infinite distances return numpy.full(points.shape[0], numpy.inf) # Extract selected points selected_points = points[selected_indices] # Calculate minimum distances if metric == DistanceMetric.L2_SQUARED: min_distances = DistanceCalculator._min_l2_squared_distances(points, selected_points) elif metric == DistanceMetric.LINF: min_distances = DistanceCalculator._min_linf_distances(points, selected_points) elif metric == DistanceMetric.TCHEBY_WEIGHTED: if weights is None: raise ValueError("Weights required for TCHEBY_WEIGHTED metric") min_distances = DistanceCalculator._min_tcheby_weighted_distances(points, selected_points, weights) else: raise ValueError(f"Unknown distance metric: {metric}") # Set selected points to have infinite distance (they should not be reselected) for idx in selected_indices: min_distances[idx] = numpy.inf return min_distances
@staticmethod def _min_l2_squared_distances(points: numpy.ndarray, selected_points: numpy.ndarray) -> numpy.ndarray: """Calculate minimum squared L2 distances to selected points.""" # Calculate distances from each point to each selected point # points: (n_points, n_dims), selected_points: (n_selected, n_dims) # Squared norms points_norms = numpy.sum(points**2, axis=1) # (n_points,) selected_norms = numpy.sum(selected_points**2, axis=1) # (n_selected,) # Dot products matrix dot_products = numpy.dot(points, selected_points.T) # (n_points, n_selected) # Distance matrix using broadcasting distances = (points_norms[:, numpy.newaxis] + selected_norms[numpy.newaxis, :] - 2 * dot_products) # Ensure non-negative distances = numpy.maximum(distances, 0.0) # Return minimum distance for each point return numpy.min(distances, axis=1) @staticmethod def _min_linf_distances(points: numpy.ndarray, selected_points: numpy.ndarray) -> numpy.ndarray: """Calculate minimum L-infinity distances to selected points.""" # points: (n_points, n_dims), selected_points: (n_selected, n_dims) # Reshape for broadcasting points_expanded = points[:, numpy.newaxis, :] # (n_points, 1, n_dims) selected_expanded = selected_points[numpy.newaxis, :, :] # (1, n_selected, n_dims) # Calculate L-infinity distances distances = numpy.max(numpy.abs(points_expanded - selected_expanded), axis=2) # (n_points, n_selected) # Return minimum distance for each point return numpy.min(distances, axis=1) @staticmethod def _min_tcheby_weighted_distances(points: numpy.ndarray, selected_points: numpy.ndarray, weights: numpy.ndarray) -> numpy.ndarray: """Calculate minimum weighted Chebyshev distances to selected points.""" # Reshape for broadcasting points_expanded = points[:, numpy.newaxis, :] # (n_points, 1, n_dims) selected_expanded = selected_points[numpy.newaxis, :, :] # (1, n_selected, n_dims) # Apply weights and calculate weighted Chebyshev distances weighted_diff = weights * numpy.abs(points_expanded - selected_expanded) distances = numpy.max(weighted_diff, axis=2) # (n_points, n_selected) # Return minimum distance for each point return numpy.min(distances, axis=1)
class Distance(ABC): @abstractmethod def get_distance(self, element1, element2) -> float: pass
[docs] class EuclideanDistance(Distance): """ Euclidean distance implementation with enhanced type safety and validation. Supports both Python lists and numpy arrays as input. Provides comprehensive input validation for robust operation. """
[docs] def get_distance(self, list1: Union[List[float], numpy.ndarray], list2: Union[List[float], numpy.ndarray]) -> float: """ Calculate the Euclidean distance between two points. Args: list1: First point as list or numpy array list2: Second point as list or numpy array Returns: float: Euclidean distance between the points Raises: ValueError: If inputs have different dimensions or are empty TypeError: If inputs cannot be converted to numeric arrays """ # Convert to numpy arrays for consistent handling try: arr1 = numpy.asarray(list1, dtype=float) arr2 = numpy.asarray(list2, dtype=float) except (ValueError, TypeError) as e: raise TypeError(f"Input vectors must be numeric: {e}") # Input validation - check for empty arrays if arr1.size == 0 or arr2.size == 0: raise ValueError("Input vectors cannot be empty") # Dimension validation if arr1.shape != arr2.shape: raise ValueError(f"Input vectors must have the same dimensions: " f"{arr1.shape} vs {arr2.shape}") # For 1D vectors, use scipy's optimized implementation if arr1.ndim == 1: return distance.euclidean(arr1, arr2) else: # For higher dimensions, flatten and compute return distance.euclidean(arr1.flatten(), arr2.flatten())
[docs] class CosineDistance(Distance): """ Cosine distance implementation with reference point translation. This class computes the cosine distance between two points after translating them relative to a reference point. The cosine distance is defined as: distance = 1 - cosine_similarity Where cosine_similarity = (a·b) / (||a|| * ||b||) The distance ranges from 0 (identical direction) to 2 (opposite directions). Performance optimizations: - Cached reference point norm for efficiency - Vectorized numpy operations - Robust input validation and error handling """ def __init__(self, reference_point: Union[List[float], numpy.ndarray]): """ Initialize the cosine distance calculator with a reference point. Args: reference_point: Point used to translate input vectors before computing distance Raises: ValueError: If reference point is empty TypeError: If reference point is not numeric or is None """ if reference_point is None: raise TypeError("Reference point cannot be None") try: self.reference_point = numpy.asarray(reference_point, dtype=float) except (ValueError, TypeError) as e: raise TypeError(f"Reference point must be numeric: {e}") if self.reference_point.size == 0: raise ValueError("Reference point cannot be empty") # Cache reference point norm for efficiency self._ref_norm = numpy.linalg.norm(self.reference_point)
[docs] def get_distance(self, list1: Union[List[float], numpy.ndarray], list2: Union[List[float], numpy.ndarray]) -> float: """ Calculate the cosine distance between two points relative to the reference point. The computation follows these steps: 1. Translate both points by subtracting the reference point 2. Compute cosine similarity of the translated vectors 3. Return cosine distance = 1 - cosine_similarity Args: list1: First point as list or numpy array list2: Second point as list or numpy array Returns: float: Cosine distance in range [0, 2] 0 = vectors point in same direction 1 = vectors are orthogonal 2 = vectors point in opposite directions Raises: ValueError: If inputs have different dimensions than reference point or are empty TypeError: If inputs cannot be converted to numeric arrays """ # Convert inputs to numpy arrays try: vec1 = numpy.asarray(list1, dtype=float) vec2 = numpy.asarray(list2, dtype=float) except (ValueError, TypeError) as e: raise TypeError(f"Input vectors must be numeric: {e}") # Validate inputs if vec1.size == 0 or vec2.size == 0: raise ValueError("Input vectors cannot be empty") if vec1.shape != vec2.shape: raise ValueError(f"Input vectors must have same dimensions: {vec1.shape} vs {vec2.shape}") if vec1.shape != self.reference_point.shape: raise ValueError(f"Input vectors must match reference point dimensions: " f"{vec1.shape} vs {self.reference_point.shape}") # Translate vectors relative to reference point diff1 = vec1 - self.reference_point diff2 = vec2 - self.reference_point # Handle zero vectors after translation (return 0 since they're at reference point) norm1 = numpy.linalg.norm(diff1) norm2 = numpy.linalg.norm(diff2) if norm1 == 0.0 and norm2 == 0.0: # Both vectors are at the reference point return 0.0 elif norm1 == 0.0 or norm2 == 0.0: # One vector is at reference point, other is not return 1.0 # Compute cosine similarity dot_product = numpy.dot(diff1, diff2) cosine_similarity = dot_product / (norm1 * norm2) # Clamp to handle numerical precision issues cosine_similarity = numpy.clip(cosine_similarity, -1.0, 1.0) # Return cosine distance = 1 - cosine_similarity distance_result = 1.0 - cosine_similarity # Handle numerical precision for identical vectors (should be exactly 0) if numpy.allclose(diff1, diff2, rtol=1e-15, atol=1e-15): return 0.0 return distance_result
[docs] def get_similarity(self, list1: Union[List[float], numpy.ndarray], list2: Union[List[float], numpy.ndarray]) -> float: """ Calculate the cosine similarity between two points relative to the reference point. This is a convenience method that returns the similarity instead of distance. Args: list1: First point as list or numpy array list2: Second point as list or numpy array Returns: float: Cosine similarity in range [-1, 1] 1 = vectors point in same direction 0 = vectors are orthogonal -1 = vectors point in opposite directions """ distance = self.get_distance(list1, list2) return 1.0 - distance