Source code for sgpykit.src.lookup_merge_and_diff

import logging
import numpy as np

from sgpykit.src.detect_insufficient_tolerance import detect_insufficient_tolerance
from sgpykit.src.mysortrows import mysortrows

logger = logging.getLogger(__name__)


[docs] def lookup_merge_and_diff(pts_list, pts_list_old, tol=1e-14): """ Identify points to compute, recycle, and discard between two sparse grids. This function compares two sets of points (from new and old sparse grids) and categorizes them based on their presence in both grids, using a specified tolerance to determine equality. It is used in adaptive sparse grid algorithms to efficiently manage point evaluations. Parameters ---------- pts_list : numpy.ndarray Array of points from the new sparse grid (shape: N x d, where N is the number of points and d is the dimensionality). pts_list_old : numpy.ndarray Array of points from the old sparse grid (shape: N_old x d). tol : float, optional Tolerance for considering two points equal (default is 1e-14). Returns ------- tocomp_list : numpy.ndarray Indices of points in pts_list that need to be evaluated (not present in old grid). recycle_list : numpy.ndarray Indices of points in pts_list that can be recycled from old grid. recycle_list_old : numpy.ndarray Corresponding indices in pts_list_old for recycled points. discard_list : numpy.ndarray Indices of points in pts_list_old that are no longer in the new grid. Notes ----- - The function uses lexicographic sorting with tolerance to identify matching points. - For non-nested grids, some points from the old grid may be discarded. - The function performs several sanity checks to ensure correctness. """ # [tocomp_list,recycle_list,recycle_list_old,discard_list] = lookup_merge_and_diff(pts_list,pts_list_old,tol) # looks for points of pts_list in pts_list_old using the same algorithm as reduce_sparse_grid. tol is # the tolerance for two points to be considered equal. discard_list is the list of points that no longer belong to the grid # (may happen for non-nested grids) N = pts_list.shape[0] N_old = pts_list_old.shape[0] # the list of indices of points to be evaluated. Init to its max length, will be cut after the search is over tocomp_list = np.zeros(N, dtype=np.int64) # if grid are not nested, not all of the old grid will be recycled. Thus, # we also need 2 list of indices of points to be recycled, storing the # positions in the old grid and in the new one recycle_list_old = np.zeros(N_old, dtype=np.int64) recycle_list = np.zeros(N, dtype=np.int64) discard_list = np.zeros(N_old, dtype=np.int64) # first, merge the two lists Merged = np.vstack((pts_list_old, pts_list)) # and a safety check: are we using a sufficiently fine tol when detecting identical points? detect_insufficient_tolerance(Merged, tol) # next, let's order the rows of Merged in lexicographic order, obtaining Sorted. If I use mysortrows then two rows like # [a b c d] # [a-t b c+t d] # are considered equal (if t < tol ) and therefore placed one after the other # sorter is an index vector that maps Merged into Sorted, i.e. Merged[sorter,:]==Sorted Sorted, sorter = mysortrows(Merged, tol) ## same fix as reduce_sparse_grid mysortrows usage # I also need to remember which points come from pts_list and which from # pts_list_old, and from which original position. # Thus I create a flag vector [-1 -2 ... -N_old 1 2 .. N], i.e. positive # flags identify the new grid and negative ones the old grid, then I sort # it according to sorter flags = np.concatenate((-np.arange(1, N_old+1), np.arange(1, N+1))) flags_sorted = flags[sorter] # next I take the difference of two consecutive rows. if the difference is small, then the rows are the same, i.e. the knot is the same dSorted = np.diff(Sorted, axis=0) # I measure the difference with infty norm instead of L2 norm: # i take the maximum component of each row (2 means "operate on columns"): # max(abs(dSorted),[],2) # then I want to see which ones have this max bigger than tol # diff_eq=max(abs(dSorted),[],2)>tol # this command returns a vector of True and false # diff_eq=[1 1 0 1 1] # this means that the 2nd point is different from the 1st, the 3rd from the 2nd, but # the 4th is equal to the 3rd ( diff(3)=v(4)-v(3) ), hence in common between the # grids. diff_eq = np.any(np.abs(dSorted) > tol, axis=1) #diff_eq = np.amax(np.abs(dSorted), [], 2) > Tol # now I scroll diff_eq and sort out everything according to these rules: # --> if diff_eq(k)==0, # in this case either Sorted(k+1) is in the new grid and Sorted(k) # is in the old grid and both are equal or viceversa. # Therefore, the point in the new grid goes into recycle_list and the # old one in recycle_list_old. # Then I can skip the following (since it's equal and I have sorted it # already) # --> else, diff_eq(k)==1. # In this case, 4 cases are possible but actually only two matters # -----> both Sorted(k) and Sorted(k+1) comes from the old_grid. # then, Sorted(k) is to discard # -----> both Sorted(k) and Sorted(k+1) comes from the new_grid. # then, Sorted(k) is to compute # -----> Sorted(k) is new, Sorted(k+1) is old. # then, Sorted(k) is to compute # -----> Sorted(k) is old, Sorted(k+1) is new. # then, Sorted(k) is to discard i = 0 # scrolls recycle lists j = 0 # scrolls compute_list k = 0 # scrolls diff_eq L = len(diff_eq) discard = 0 while k < L: if diff_eq[k]: # Two consecutive entries are different if flags_sorted[k] > 0: tocomp_list[j] = flags_sorted[k]-1 # 0-based j += 1 else: discard_list[discard] = -flags_sorted[k]-1 # 0-based discard += 1 # then move to the following k += 1 else: # Two consecutive entries are equal if flags_sorted[k] > 0: recycle_list[i] = flags_sorted[k]-1 # 0-based recycle_list_old[i] = -flags_sorted[k + 1]-1 # 0-based else: recycle_list_old[i] = -flags_sorted[k]-1 recycle_list[i] = flags_sorted[k + 1]-1 i += 1 # then I can skip the k+1 because I have already sorted it k += 2 # need to handle the case k==L, since diff is 1 element shorter than sorted. Note # that # --> the node in Sorted[L,:] has been already taken care of inside the while loop # --> we only need to do something if the diff_eq(L)==1. Indeed, if # diff_eq(L)==0, then Sorted(L+1,:) is already taken care of (and in this # case the final value of k is L+2) if diff_eq[L - 1]: # 0-based indexing if flags_sorted[L] > 0: tocomp_list[j] = flags_sorted[L]-1 # 0-based j += 1 else: discard_list[discard] = -flags_sorted[L-1]-1 # 0-based discard += 1 # show some statistics logger.debug(f'new evaluation needed: {j} recycled evaluations: {i}, discarded evaluations: {discard}') # remove the extra entries of tocomp_list, recycle_lists, discard_list. Pay attention to special cases if j != N: # in this case there are no points to recycle and we have completely filled tocomp_list if tocomp_list[j] != 0: raise RuntimeError('Failed sanity check: tocomp_list[j]~=0') tocomp_list = tocomp_list[:j] if i == N: logger.warning('the two grids are the same!') else: if i > N: raise RuntimeError('Failed sanity check: The code has detected more points to recycle than points in the new sparse grid! ' 'Double check the values of tolerances used to detect identical points (both here and in reduce_sparse_grid) ' 'and rerun the code. i>N') if recycle_list[i] != 0: raise RuntimeError('Failed sanity check: recycle_list[i]~=0') recycle_list = recycle_list[:i] if i != len(recycle_list_old): if recycle_list_old[i] != 0: raise RuntimeError('Failed sanity check: recycle_list_old[i]~=0') recycle_list_old = recycle_list_old[:i] if discard != N_old: if discard_list[discard] != 0: raise RuntimeError('Failed sanity check: discard_list[discard]~=0') discard_list = discard_list[:discard] # safety checks if i + discard != N_old: raise RuntimeError('Failed sanity check: The code has lost track of some points of the old grid, i+discard~=N_old') if len(recycle_list) != len(recycle_list_old): raise RuntimeError('Failed sanity check: mismatch between the two sets of recycling points. length(recycle_list)~=length(recycle_list_old)') if not np.array_equal(np.sort(np.concatenate((tocomp_list, recycle_list))), np.arange(0, N)): raise RuntimeError('Failed sanity check: The code has lost track of some points of the new grid, or some points from the old grid have been mistaken as points of the new grid. Double check the values of tolerances use to detect identical points (both here and in reduce_sparse_grid) and try to rerun the code.') return tocomp_list, recycle_list, recycle_list_old, discard_list