| Title: | Tools for Distance Metrics |
|---|---|
| Description: | Provides tools for constructing, manipulating and using distance metrics. |
| Authors: | Fredrik Savje [aut, cre] |
| Maintainer: | Fredrik Savje <[email protected]> |
| License: | GPL (>= 3) |
| Version: | 0.1.13 |
| Built: | 2026-05-23 08:03:30 UTC |
| Source: | https://github.com/fsavje/distances |
distance_columns extracts columns from the distance matrix.
distance_columns(distances, column_indices, row_indices = NULL)distance_columns(distances, column_indices, row_indices = NULL)
distances |
A |
column_indices |
An integer vector with point indices indicating which columns to be extracted. |
row_indices |
If |
If the complete distance matrix is desired, distance_matrix is
faster than distance_columns.
Returns a matrix with the requested columns.
distance_matrix makes distance matrices (complete and partial) from
distances objects.
distance_matrix(distances, indices = NULL)distance_matrix(distances, indices = NULL)
distances |
A |
indices |
If |
Returns a distance matrix of class dist.
distances constructs a distance metric for a set of points. Currently,
it only creates Euclidean distances. It can, however, create distances in any
linear projection of Euclidean space. In other words, Mahalanobis
distances or normalized Euclidean distances are both possible. It is also possible
to give each dimension of the space different weights.
distances( data, id_variable = NULL, dist_variables = NULL, normalize = NULL, weights = NULL )distances( data, id_variable = NULL, dist_variables = NULL, normalize = NULL, weights = NULL )
data |
a matrix or data frame containing the data points between distances should be derived. |
id_variable |
optional IDs of the data points.
If |
dist_variables |
optional names of the columns in |
normalize |
optional normalization of the data prior to distance construction. If |
weights |
optional weighting of the data prior to distance construction. If |
Let and be two data points in data described by two vectors. distances
uses the following metric to derive the distance between and :
where is the Cholesky decomposition (lower triangular) of the inverse of the
matrix speficied by normalize, and is the matrix speficied by weights.
When normalize is var(data) (i.e., using the "mahalanobize" option), the function gives
(weighted) Mahalanobis distances. When normalize is diag(var(data)) (i.e., using
the "studentize" option), the function divides each column by its variance leading to (weighted) normalized
Euclidean distances. If normalize is the identity matrix (i.e., using the "none" or NULL option), the function
derives ordinary Euclidean distances.
Returns a distances object.
my_data_points <- data.frame(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), y = c(10, 9, 8, 7, 6, 6, 7, 8, 9, 10)) # Euclidean distances my_distances1 <- distances(my_data_points) # Euclidean distances in only one dimension my_distances2 <- distances(my_data_points, dist_variables = "x") # Mahalanobis distances my_distances3 <- distances(my_data_points, normalize = "mahalanobize") # Custom normalization matrix my_norm_mat <- matrix(c(3, 1, 1, 3), nrow = 2) my_distances4 <- distances(my_data_points, normalize = my_norm_mat) # Give "x" twice the weight compared to "y" my_distances5 <- distances(my_data_points, weights = c(2, 1)) # Use normalization and weighting my_distances6 <- distances(my_data_points, normalize = "mahalanobize", weights = c(2, 1)) # Custom ID labels my_data_points_withID <- data.frame(my_data_points, my_ids = letters[1:10]) my_distances7 <- distances(my_data_points_withID, id_variable = "my_ids") # Compare to standard R functions all.equal(as.matrix(my_distances1), as.matrix(dist(my_data_points))) # > TRUE all.equal(as.matrix(my_distances2), as.matrix(dist(my_data_points[, "x"]))) # > TRUE tmp_distances <- sqrt(mahalanobis(as.matrix(my_data_points), unlist(my_data_points[1, ]), var(my_data_points))) names(tmp_distances) <- 1:10 all.equal(as.matrix(my_distances3)[1, ], tmp_distances) # > TRUE tmp_data_points <- as.matrix(my_data_points) tmp_data_points[, 1] <- sqrt(2) * tmp_data_points[, 1] all.equal(as.matrix(my_distances5), as.matrix(dist(tmp_data_points))) # > TRUE tmp_data_points <- as.matrix(my_data_points) tmp_cov_mat <- var(tmp_data_points) tmp_data_points[, 1] <- sqrt(2) * tmp_data_points[, 1] tmp_distances <- sqrt(mahalanobis(tmp_data_points, tmp_data_points[1, ], tmp_cov_mat)) names(tmp_distances) <- 1:10 all.equal(as.matrix(my_distances6)[1, ], tmp_distances) # > TRUE tmp_distances <- as.matrix(dist(my_data_points)) colnames(tmp_distances) <- rownames(tmp_distances) <- letters[1:10] all.equal(as.matrix(my_distances7), tmp_distances) # > TRUEmy_data_points <- data.frame(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), y = c(10, 9, 8, 7, 6, 6, 7, 8, 9, 10)) # Euclidean distances my_distances1 <- distances(my_data_points) # Euclidean distances in only one dimension my_distances2 <- distances(my_data_points, dist_variables = "x") # Mahalanobis distances my_distances3 <- distances(my_data_points, normalize = "mahalanobize") # Custom normalization matrix my_norm_mat <- matrix(c(3, 1, 1, 3), nrow = 2) my_distances4 <- distances(my_data_points, normalize = my_norm_mat) # Give "x" twice the weight compared to "y" my_distances5 <- distances(my_data_points, weights = c(2, 1)) # Use normalization and weighting my_distances6 <- distances(my_data_points, normalize = "mahalanobize", weights = c(2, 1)) # Custom ID labels my_data_points_withID <- data.frame(my_data_points, my_ids = letters[1:10]) my_distances7 <- distances(my_data_points_withID, id_variable = "my_ids") # Compare to standard R functions all.equal(as.matrix(my_distances1), as.matrix(dist(my_data_points))) # > TRUE all.equal(as.matrix(my_distances2), as.matrix(dist(my_data_points[, "x"]))) # > TRUE tmp_distances <- sqrt(mahalanobis(as.matrix(my_data_points), unlist(my_data_points[1, ]), var(my_data_points))) names(tmp_distances) <- 1:10 all.equal(as.matrix(my_distances3)[1, ], tmp_distances) # > TRUE tmp_data_points <- as.matrix(my_data_points) tmp_data_points[, 1] <- sqrt(2) * tmp_data_points[, 1] all.equal(as.matrix(my_distances5), as.matrix(dist(tmp_data_points))) # > TRUE tmp_data_points <- as.matrix(my_data_points) tmp_cov_mat <- var(tmp_data_points) tmp_data_points[, 1] <- sqrt(2) * tmp_data_points[, 1] tmp_distances <- sqrt(mahalanobis(tmp_data_points, tmp_data_points[1, ], tmp_cov_mat)) names(tmp_distances) <- 1:10 all.equal(as.matrix(my_distances6)[1, ], tmp_distances) # > TRUE tmp_distances <- as.matrix(dist(my_data_points)) colnames(tmp_distances) <- rownames(tmp_distances) <- letters[1:10] all.equal(as.matrix(my_distances7), tmp_distances) # > TRUE
distances objectis.distances checks whether the provided object
is a valid instance of the distances class.
is.distances(x)is.distances(x)
x |
object to check. |
Returns TRUE if x is a valid
distances object, otherwise FALSE.
max_distance_search searches for the data point furthest from a set of
query points.
max_distance_search(distances, query_indices = NULL, search_indices = NULL)max_distance_search(distances, query_indices = NULL, search_indices = NULL)
distances |
A |
query_indices |
An integer vector with point indices to query. If |
search_indices |
An integer vector with point indices to search among. If |
An integer vector with point indices for the data point furthest from each query.
nearest_neighbor_search searches for the k nearest neighbors of a set of
query points.
nearest_neighbor_search( distances, k, query_indices = NULL, search_indices = NULL, radius = NULL )nearest_neighbor_search( distances, k, query_indices = NULL, search_indices = NULL, radius = NULL )
distances |
A |
k |
The number of neighbors to search for. |
query_indices |
An integer vector with point indices to query. If |
search_indices |
An integer vector with point indices to search among. If |
radius |
Restrict the search to a fixed radius around each query. If fewer than |
A matrix with point indices for the nearest neighbors. Columns in this matrix indicate queries, and rows are ordered by distances from the query.