Sample given points stochastically in a 3D space with minimum nearest-neighbor distance and maximum density Sample given points stochastically in a 3D space with minimum nearest-neighbor distance and maximum density numpy numpy

Sample given points stochastically in a 3D space with minimum nearest-neighbor distance and maximum density


There's probably an efficient bicriteria approximation scheme, but why bother when integer programming is so quick on average?

import numpy as npn = 300points = np.random.uniform(0, 10, size=(n, 3))from ortools.linear_solver import pywraplpsolver = pywraplp.Solver.CreateSolver("SCIP")variables = [solver.BoolVar("x[{}]".format(i)) for i in range(n)]solver.Maximize(sum(variables))for j, q in enumerate(points):    for i, p in enumerate(points[:j]):        if np.linalg.norm(p - q) <= 1:            solver.Add(variables[i] + variables[j] <= 1)solver.EnableOutput()solver.Solve()print(len([i for (i, variable) in enumerate(variables) if variable.SolutionValue()]))


This isn't optimally large of a subset, but should be close while not taking very long, using KDTree to optimize the distance calculations:

from scipy.spatial import KDTreeimport numpy as npdef space_sample(n = 300, low = 0, high = 10, dist = 1):    points = np.random.uniform(low, high, size=(n, 3))    k = KDTree(points)    pairs = np.array(list(k.query_pairs(dist)))        def reduce_pairs(pairs, remove = []):  #iteratively remove the most connected node        p = pairs[~np.isin(pairs, remove).any(1)]        if p.size >0:            count = np.bincount(p.flatten(), minlength = n)            r = remove + [count.argmax()]            return reduce_pairs(p, r)        else:            return remove        return np.array([p for i, p in enumerate(points) if not(i in reduce_pairs(pairs))])subset = space_sample()

Iteratively removing the most connected node is not optimal (keeps about 200 of the 300 points), but is likely the fastest algorithm that's close to optimal (the only thing faster being just removing at random). You could possibly @njit reduce_pairs to make that part faster (will try if I have time later).


Testing @David Eisenstat's answer with 30 given points:

from ortools.linear_solver import pywraplpimport numpy as npdef subset_David_Eisenstat(points, r):    solver = pywraplp.Solver.CreateSolver("SCIP")    variables = [solver.BoolVar("x[{}]".format(i)) for i in range(len(points))]    solver.Maximize(sum(variables))    for j, q in enumerate(points):        for i, p in enumerate(points[:j]):            if np.linalg.norm(p - q) <= r:                solver.Add(variables[i] + variables[j] <= 1)    solver.EnableOutput()    solver.Solve()    indices = [i for (i, variable) in enumerate(variables) if variable.SolutionValue()]    return points[indices]points = np.array([[ 7.32837882, 12.12765786, 15.01412241], [ 8.25164031, 11.14830379, 15.01412241], [ 8.21790113, 13.05647987, 13.05647987], [ 7.30031002, 13.08276009, 14.05452502], [ 9.18056467, 12.0800735 , 13.05183844], [ 9.17929647, 11.11270337, 14.03027534], [ 7.64737905, 11.48906945, 15.34274827], [ 7.01315886, 12.77870699, 14.70301668], [ 8.88132414, 10.81243313, 14.68685022], [ 7.60617372, 13.39792166, 13.39792166], [ 8.85967682, 12.72946394, 12.72946394], [ 9.50060705, 11.43361294, 13.37866092], [ 8.21790113, 12.08471494, 14.02824481], [ 7.32837882, 12.12765786, 16.98587759], [ 8.25164031, 11.14830379, 16.98587759], [ 7.30031002, 13.08276009, 17.94547498], [ 8.21790113, 13.05647987, 18.94352013], [ 9.17929647, 11.11270337, 17.96972466], [ 9.18056467, 12.0800735 , 18.94816156], [ 7.64737905, 11.48906945, 16.65725173], [ 7.01315886, 12.77870699, 17.29698332], [ 8.88132414, 10.81243313, 17.31314978], [ 7.60617372, 13.39792166, 18.60207834], [ 8.85967682, 12.72946394, 19.27053606], [ 9.50060705, 11.43361294, 18.62133908], [ 8.21790113, 12.08471494, 17.97175519], [ 7.32837882, 15.01412241, 12.12765786], [ 8.25164031, 15.01412241, 11.14830379], [ 7.30031002, 14.05452502, 13.08276009], [ 9.18056467, 13.05183844, 12.0800735 ],])

When the expected minimum distance is 1:

subset1 = subset_David_Eisenstat(points, r=1.)print(len(subset1))# Output: 18

Check the minimum distance:

from scipy.spatial.distance import cdistdist = cdist(subset1, subset1, metric='euclidean')# Delete diagonalres = dist[~np.eye(dist.shape[0],dtype=bool)].reshape(dist.shape[0],-1)print(np.min(res))# Output: 1.3285513450926985

Change the expected minimum distance to 2:

subset2 = subset_David_Eisenstat(points, r=2.)print(len(subset2))# Output: 10

Check the minimum distance:

from scipy.spatial.distance import cdistdist = cdist(subset2, subset2, metric='euclidean')# Delete diagonalres = dist[~np.eye(dist.shape[0],dtype=bool)].reshape(dist.shape[0],-1)print(np.min(res))# Output: 2.0612041004376223