1+ CREATE OR REPLACE FUNCTION gds.vector.distance(list<double> list1, list<double> list2, string metric) RETURNS(float) {
2+
3+ /*
4+ First Author: Jue Yuan
5+ First Commit Date: Nov 27, 2024
6+
7+ Recent Author: Jue Yuan
8+ Recent Commit Date: Nov 27, 2024
9+
10+ Maturity:
11+ alpha
12+
13+ Description:
14+ Calculates the distance between two vectors represented as lists of double values,
15+ based on a specified distance metric. This function supports multiple metrics,
16+ allowing for flexible similarity or dissimilarity measurements in various computational tasks.
17+
18+ Parameters:
19+ list<double> list1:
20+ The first vector as a list of double values.
21+ list<double> list2:
22+ The second vector as a list of double values.
23+ string metric:
24+ The distance metric to use. Supported metrics are:
25+ "cosine": Cosine distance
26+ "l2": Euclidean distance
27+ "ip": Inner product (dot product)
28+ Returns:
29+ float:
30+ The computed distance between the two input vectors based on the specified metric.
31+
32+ Exceptions:
33+ list_size_mismatch (90000):
34+ Raised when the input vectors are not of equal size.
35+ zero_divisor(90001);
36+ Raised either list is all zero to avoid zero-divisor issue.
37+ invalid_metric_type (90002):
38+ Raised when an unsupported distance metric is provided.
39+
40+ Logic Overview:
41+ Input Validation:
42+ Ensures both vectors have the same size.
43+ Metric Handling:
44+ Cosine Distance:
45+ Calculated as 1 - (inner product of vectors) / (product of magnitudes).
46+ L2 Distance:
47+ Computes the square root of the sum of squared differences between corresponding elements.
48+ Inner Product:
49+ Directly computes the dot product of the two vectors.
50+
51+ Error Handling:
52+ Raises an exception if the provided metric is invalid.
53+
54+ Use Case:
55+ This function is essential for machine learning, data science, and information retrieval applications,
56+ where distance or similarity calculations between vector representations (such as embeddings or feature vectors) are required.
57+ */
58+
59+ EXCEPTION list_size_mismatch (90000);
60+ EXCEPTION zero_divisor(90001);
61+ EXCEPTION invalid_metric_type (90002);
62+ ListAccum<double> @@myList1 = list1;
63+ ListAccum<double> @@myList2 = list2;
64+
65+ IF (@@myList1.size() != @@myList2.size()) THEN
66+ RAISE list_size_mismatch ("Two lists provided for gds.vector.distance have different sizes.");
67+ END;
68+
69+ SumAccum<float> @@myResult;
70+ SumAccum<float> @@sqrSum;
71+
72+ CASE lower(metric)
73+ WHEN "cosine" THEN
74+ double inner_p = inner_product(@@myList1, @@myList2);
75+ double v1_magn = sqrt(inner_product(@@myList1, @@myList1));
76+ double v2_magn = sqrt(inner_product(@@myList2, @@myList2));
77+ IF (abs(v1_magn) < 0.0000001) THEN
78+ // use a small positive float to avoid numeric comparison error
79+ RAISE zero_divisor ("The elements in the first list are all zero. It will introduce a zero divisor.");
80+ END;
81+ IF (abs(v2_magn) < 0.0000001) THEN
82+ // use a small positive float to avoid numeric comparison error
83+ RAISE zero_divisor ("The elements in the second list are all zero. It will introduce a zero divisor.");
84+ END;
85+ @@myResult = 1 - inner_p / (v1_magn * v2_magn);
86+ WHEN "l2" THEN
87+ FOREACH i IN RANGE [0, @@myList1.size() - 1 ] DO
88+ @@sqrSum += (@@myList1.get(i) - @@myList2.get(i)) * (@@myList1.get(i) - @@myList2.get(i));
89+ END;
90+ @@myResult = sqrt(@@sqrSum);
91+ WHEN "ip" THEN
92+ @@myResult = inner_product(@@myList1, @@myList2);
93+ ELSE
94+ RAISE invalid_metric_type ("Invalid metric algorithm provided, currently supported: cosine, l2 and ip.");
95+ END
96+ ;
97+
98+ RETURN @@myResult;
99+ }
0 commit comments