-
Notifications
You must be signed in to change notification settings - Fork 25
Expand file tree
/
Copy pathmetrics.py
More file actions
169 lines (124 loc) · 5.19 KB
/
metrics.py
File metadata and controls
169 lines (124 loc) · 5.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""
Utilities for computing metrics.
This module provides functions to calculate various distance metrics. A metric,
or distance function, is a function that maps two points to a double value,
representing the "distance" between them. For a function to qualify as a valid
metric, it must satisfy the following properties:
1. Symmetry: The distance between two points is the same regardless of the
order, i.e.:
:math:`d(x, y) = d(y, x)` for all :math:`x` and :math:`y`.
2. Positivity: The distance between two distinct points is always positive,
i.e.:
:math:`d(x, y) > 0` for all distinct :math:`x` and :math:`y`, and
:math:`d(x, x) = 0` for every :math:`x`.
3. Triangle inequality: The distance between two points is less than or equal
to the sum of the distances from a third point, i.e.:
:math:`d(x, z) \\leq d(x, y) + d(y, z)` for all points :math:`x, y, z`.
Supported distance metrics include:
- *Euclidean*: The square root of the sum of squared differences between the
components of vectors.
- *Manhattan*: The sum of the absolute differences between the components of
vectors.
- *Minkowski*: A generalization of the Euclidean and Chebyshev distances,
parameterized by an order `p`.
- *Chebyshev*: The maximum absolute difference between the components of vectors.
- *Cosine*: A distance on unit vectors based on cosine similarity.
"""
from typing import Any, Literal, Union, get_args
import numpy as np
import tdamapper.utils._metrics as _metrics
from tdamapper.protocols import Metric
MetricLiteral = Literal[
"euclidean",
"manhattan",
"minkowski",
"chebyshev",
"cosine",
]
def get_supported_metrics() -> list[MetricLiteral]:
"""
Return a list of supported metric names.
:return: A list of supported metric names.
"""
return list(get_args(MetricLiteral))
def euclidean() -> Metric[Any]:
"""
Return the Euclidean distance function for vectors.
The Euclidean distance is defined as the square root of the sum of
the squared differences between the components of the vectors.
:return: The Euclidean distance function.
"""
return _metrics.euclidean
def manhattan() -> Metric[Any]:
"""
Return the Manhattan distance function for vectors.
The Manhattan distance is defined as the sum of the absolute differences
between the components of the vectors.
:return: The Manhattan distance function.
"""
return _metrics.manhattan
def chebyshev() -> Metric[Any]:
"""
Return the Chebyshev distance function for vectors.
The Chebyshev distance is defined as the maximum absolute difference
between the components of the vectors.
:return: The Chebyshev distance function.
"""
return _metrics.chebyshev
def minkowski(p: Union[int, float]) -> Metric[Any]:
"""
Return the Minkowski distance function for order p on vectors.
The Minkowski distance is a generalization of the Euclidean and Chebyshev
distances. When p = 1, it is equivalent to the Manhattan distance, and
when p = 2, it is equivalent to the Euclidean distance. When p is infinite,
it is equivalent to the Chebyshev distance.
:param p: The order of the Minkowski distance.
:return: The Minkowski distance function.
"""
if p == 1:
return manhattan()
if p == 2:
return euclidean()
if np.isinf(p):
return chebyshev()
def dist(x: Any, y: Any) -> float:
return _metrics.minkowski(p, x, y)
return dist
def cosine() -> Metric[Any]:
"""
Return the cosine distance function for vectors.
The cosine similarity between the input vectors ranges from -1.0 to 1.0.
- A value of 1.0 indicates that the vectors are in the same direction.
- A value of 0.0 indicates orthogonality (the vectors are perpendicular).
- A value of -1.0 indicates that the vectors are diametrically opposed.
The cosine distance is derived from the cosine similarity :math:`s` and
is defined as: :math:`d(x, y) = \\sqrt{2 \\cdot (1 - s(x, y))}`
This definition ensures that the cosine distance satisfies the triangle
inequality on unit vectors.
:return: The cosine distance function.
"""
return _metrics.cosine
def get_metric(metric: Union[MetricLiteral, Metric[Any]], **kwargs: Any) -> Metric[Any]:
"""
Return a distance function based on the specified string or callable.
:param metric: The metric to use. If a callable function is provided, it
is returned directly. Otherwise, predefined metric names returned by
:func:`tdamapper.utils.metrics.get_supported_metrics` are supported.
:param kwargs: Additional keyword arguments (e.g., 'p' for Minkowski
distance).
:return: The selected distance metric function.
:raises ValueError: If an invalid metric string is provided.
"""
if callable(metric):
return metric
if metric == "euclidean":
return euclidean(**kwargs)
if metric == "manhattan":
return manhattan(**kwargs)
if metric == "minkowski":
return minkowski(**kwargs)
if metric == "chebyshev":
return chebyshev(**kwargs)
if metric == "cosine":
return cosine(**kwargs)
raise ValueError("metric must be a known string or callable")