Coverage for jetgp/full_degp/acquisition_funcs.py: 0%
110 statements
« prev ^ index » next coverage.py v7.10.7, created at 2026-03-31 11:46 -0500
« prev ^ index » next coverage.py v7.10.7, created at 2026-03-31 11:46 -0500
1# acquisition_functions.py
3import numpy as np
4from jetgp.full_degp import degp_utils
5import jetgp.utils as utils
6from line_profiler import profile
7from scipy.stats.qmc import Sobol
8from scipy.optimize import minimize
9def find_next_point_batch(
10 gp,
11 params,
12 dist_params,
13 acquisition_func,
14 integration_points=None,
15 candidate_points=None,
16 n_candidate_points: int = 1024,
17 n_local_starts: int = 1,
18 n_batch_points: int = 10,
19 local_opt = True,
20 **acq_kwargs
21):
22 """
23 Batch active learning with support for aggregated mode (multi-GP).
25 If use_agg_al=False: single GP loop.
26 If use_agg_al=True: use gp_list + params_list and acquisition_func must handle them.
27 Returns: np.ndarray of selected points, shape (n_selected, d).
28 """
30 selected_points = []
31 n_integration_points = integration_points.shape[0]
32 # helper to generate candidates
33 def _generate_candidates(dimension, seed_offset=0):
34 sampler = Sobol(d=dimension, scramble=True, seed=acq_kwargs.get('seed', 0) + seed_offset)
35 coarse_points_unit = sampler.random(n=n_candidate_points)
36 return utils.get_inverse(dist_params, coarse_points_unit)
38 if type(gp) is not list or type(params) is not list:
39 raise ValueError("gp and params must be provided as list")
43 for n in range(n_batch_points):
44 if candidate_points is None:
45 candidate_points = _generate_candidates(gp[0].n_bases,seed_offset=n)
46 # Normalize once before the loop
47 cand_scores_all = []
49 if gp[0].normalize:
50 sigmas_x = gp[0].sigmas_x
51 mus_x = gp[0].mus_x
53 candidate_points_norm = utils.normalize_x_data_test(candidate_points.copy(), sigmas_x, mus_x)
54 if integration_points is not None:
55 integration_points_norm = utils.normalize_x_data_test(integration_points.copy(), sigmas_x, mus_x)
56 n_integration_points = integration_points.shape[0]
57 else:
58 candidate_points_norm = candidate_points.copy()
59 integration_points_norm = integration_points.copy()
60 n_integration_points = None
62 # Precompute all differences once
63 precomputed_diffs = {
64 'train_cand': degp_utils.differences_by_dim_func(
65 gp[0].x_train, candidate_points_norm, gp[0].n_order, return_deriv=False
66 ),
67 'cand_cand': degp_utils.differences_by_dim_func(
68 candidate_points_norm, candidate_points_norm, gp[0].n_order, return_deriv=False
69 ),
70 'domain_cand': degp_utils.differences_by_dim_func(
71 integration_points_norm, candidate_points_norm, gp[0].n_order, return_deriv=False
72 ),
73 'train_domain': degp_utils.differences_by_dim_func(
74 gp[0].x_train, integration_points_norm, gp[0].n_order, return_deriv=False
75 )
76 }
77 # Loop over time steps - much simpler now!
78 for gp_t, params_t in zip(gp, params):
79 scores_t = acquisition_func(
80 gp_t,
81 params_t,
82 precomputed_diffs,
83 n_integration_points = n_integration_points
84 )
88 cand_scores_all.append(scores_t.reshape(-1, 1))
91 cand_scores = np.mean(np.hstack(cand_scores_all), axis=1)
92 # --- Pick top starting points from aggregated candidate scores ---
93 top_indices = np.argsort(cand_scores.ravel())[-n_local_starts:]
94 top_starting_points = candidate_points[top_indices]
96 if local_opt:
97 # --- Get bounds for local optimization ---
98 bounds = utils.get_optimization_bounds(dist_params)
99 all_unbounded = all(b[0] is None for b in bounds)
101 # --- Define objective function for local refinement ---
102 def objective_function(x: np.ndarray) -> float:
103 """
104 Objective for local optimization.
105 Note: Can't use precomputed diffs here since x is varying during optimization.
106 """
107 x_cand = x.reshape(1, -1)
109 # Normalize the candidate point if needed
110 if gp[0].normalize:
111 x_cand_norm = utils.normalize_x_data_test(x_cand.copy(), sigmas_x, mus_x)
112 else:
113 x_cand_norm = x_cand.copy()
115 # Compute differences for this single candidate
116 diff_train_cand = degp_utils.differences_by_dim_func(
117 gp[0].x_train, x_cand_norm, gp[0].n_order, return_deriv=False
118 )
119 diff_cand_cand = degp_utils.differences_by_dim_func(
120 x_cand_norm, x_cand_norm, gp[0].n_order, return_deriv=False
121 )
122 diff_domain_cand = degp_utils.differences_by_dim_func(
123 integration_points_norm, x_cand_norm, gp[0].n_order, return_deriv=False
124 )
126 # Precomputed diffs for this candidate
127 local_precomputed_diffs = {
128 'train_cand': diff_train_cand,
129 'cand_cand': diff_cand_cand,
130 'domain_cand': diff_domain_cand,
131 'train_domain': precomputed_diffs['train_domain'] # Reuse this one
132 }
134 # Compute aggregated score across all GPs
135 score_list = [
136 acquisition_func(
137 gp_t,
138 params_t,
139 precomputed_diffs=local_precomputed_diffs,
140 n_integration_points = n_integration_points
141 )
142 for gp_t, params_t in zip(gp, params)
143 ]
144 return -np.mean([s[0] if isinstance(s, np.ndarray) else s for s in score_list])
146 # --- Local optimization loop ---
147 best_x, best_score = None, -np.inf
148 for start_point in top_starting_points:
149 if all_unbounded:
150 res = minimize(fun=objective_function, x0=start_point, method="BFGS")
151 else:
152 res = minimize(fun=objective_function, x0=start_point, method="L-BFGS-B", bounds=bounds)
154 if -res.fun > best_score:
155 best_score, best_x = -res.fun, res.x
157 best_x = best_x.reshape(1, -1)
158 selected_points.append(best_x)
159 else:
160 top_index = np.argmax(cand_scores.ravel())
161 best_x = candidate_points[top_index].reshape(1, -1)
162 selected_points.append(best_x)
163 candidate_points = np.delete(candidate_points, top_index, axis=0)
165 # Augment each GP with predicted outputs at best_x
166 for t, gp_t in enumerate(gp):
167 params_t = params[t]
168 y_pred = gp_t.predict(best_x, params_t, calc_cov=False, return_deriv=True)
169 y_to_add = utils._format_y_to_add(y_pred)
171 x_aug = np.vstack([gp_t.x_train_input, best_x])
172 y_aug = []
173 for i_y, y_block in enumerate(gp_t.y_train_input):
174 if i_y < len(y_to_add):
175 y_aug.append(np.vstack([y_block, y_to_add[i_y]]))
176 else:
177 y_aug.append(y_block)
179 gp[t] = gp_t.__class__(
180 x_aug, y_aug,
181 gp_t.n_order,
182 n_bases=gp_t.n_bases,
183 der_indices=gp_t.der_indices,
184 normalize=gp_t.normalize,
185 kernel=gp_t.kernel,
186 kernel_type=gp_t.kernel_type,
187 )
189 return np.vstack(selected_points)
191# def imse_reduction_verify(gp, params, x_train, y_train_list,der_indices, candidate_points, full_domain, noise_var=None):
192# """
193# Brute-force IMSE verification using temporary DE-GP for each candidate point.
194# Handles flattened y_train_candidates output from gp.predict correctly.
196# Args:
197# gp: Original degp object.
198# params: Kernel hyperparameters.
199# y_train_list: List of arrays (function + derivatives) for the original training points.
200# candidate_points: Candidate points to evaluate (n_candidates, d).
201# full_domain: Domain points for IMSE computation (n_domain, d).
202# noise_var: Optional noise variance for function observations.
204# Returns:
205# imse_brute: Array of IMSE reductions for each candidate.
206# """
207# n_candidates = candidate_points.shape[0]
208# n_domain = full_domain.shape[0]
209# n_derivs = len(y_train_list) - 1 # number of derivatives
210# imse_brute = np.zeros(n_candidates)
212# # 1. Original GP posterior variance at full_domain (function values only)
213# _, sigma2_orig = gp.predict(
214# full_domain, params, calc_cov=True, return_deriv=False)
215# sigma2_orig = np.maximum(sigma2_orig, 0.0)
217# # 2. Predict all function + derivatives over full domain
218# y_train_candidates_flat = gp.predict(
219# full_domain, params, calc_cov=False, return_deriv=True)
221# for i_cand, x_cand in enumerate(candidate_points):
222# # 3. Extract candidate outputs from the flattened array
223# y_cand_list = []
224# for k in range(n_derivs + 1): # include function
225# start = k * n_domain + i_cand
226# end = start + 1
227# y_cand_list.append(y_train_candidates_flat[start:end])
229# # 4. Construct temporary training outputs (list of arrays)
230# y_train_temp = []
231# for j in range(len(y_train_list)):
232# y_train_temp.append(np.vstack([y_train_list[j], y_cand_list[j]]))
234# # 5. Construct augmented training inputs
235# X_train_temp = np.vstack([x_train.copy(), x_cand])
237# # 6. Initialize temporary GP with augmented training data
238# gp_temp = degp(
239# X_train_temp, y_train_temp, gp.n_order, gp.n_bases,
240# normalize=gp.normalize,
241# kernel=gp.kernel,
242# der_indices=gp.der_indices,
243# kernel_type=gp.kernel_type
244# )
246# # 7. Predict posterior variance at full domain (function values only)
247# _, sigma2_new = gp_temp.predict(
248# full_domain, params, calc_cov=True, return_deriv=False)
249# sigma2_new = np.maximum(sigma2_new, 0.0)
251# # 8. Compute IMSE reduction for this candidate
252# imse_brute[i_cand] = np.mean((sigma2_orig - sigma2_new))
253# return imse_brute
255@profile
256def mse_reduction(
257 gp, params, precomputed_diffs,
258 noise_var=None, return_deriv=False, **kwargs
259):
260 """
261 Compute MSE reduction (posterior variance) at candidate points.
263 Args:
264 gp: GP object
265 params: Kernel hyperparameters
266 x_train: Training inputs
267 y_train_list: Training outputs
268 candidate_points: Normalized candidate points to evaluate
269 precomputed_diffs: Dict with precomputed difference matrices:
270 - 'train_cand': differences between training and candidate points
271 - 'cand_cand': differences between candidate points
272 noise_var: Observation noise variance
273 return_deriv: Whether to return derivatives
274 normalize: Whether to normalize y data (x data assumed already normalized)
276 Returns:
277 1D array of posterior variances at candidate points
278 """
282 length_scales = params[:-1]
283 sigma_n = params[-1]
285 # Training kernel
286 diff_train_train = gp.differences_by_dim
287 K = degp_utils.rbf_kernel(
288 diff_train_train, length_scales, gp.n_order, gp.n_bases, gp.kernel_func,
289 gp.flattened_der_indicies, gp.powers, return_deriv=True
290 )
291 K += (10**sigma_n) ** 2 * np.eye(K.shape[0])
293 # Use precomputed differences
294 diff_train_cand = precomputed_diffs['train_cand']
295 diff_cand_cand = precomputed_diffs['cand_cand']
297 # Compute kernels from differences
298 K_s = degp_utils.rbf_kernel(
299 diff_train_cand, length_scales, gp.n_order, gp.n_bases, gp.kernel_func,
300 gp.flattened_der_indicies, gp.powers, return_deriv=return_deriv
301 )
303 n = K_s.shape[1]
305 K_ss = degp_utils.rbf_kernel(
306 diff_cand_cand, length_scales, gp.n_order, gp.n_bases, gp.kernel_func,
307 gp.flattened_der_indicies, gp.powers, return_deriv=return_deriv
308 )
310 # Compute posterior covariance
311 f_cov = (
312 K_ss - K_s.T @ np.linalg.solve(K, K_s)
313 if return_deriv
314 else K_ss[:n, :n] - K_s[:, :n].T @ np.linalg.solve(K, K_s[:, :n])
315 )
317 return np.diag(f_cov)
320def imse_reduction(
321 gp, params, precomputed_diffs,
322 noise_var=None, return_deriv=False, **kwargs
323):
324 """
325 Vectorized exact IMSE reduction with precomputed differences.
327 Args:
328 gp: GP object
329 params: Kernel hyperparameters
330 x_train: Training inputs
331 y_train_list: Training outputs
332 candidate_points: Normalized candidate points to evaluate
333 integration_points: Normalized integration points for IMSE
334 precomputed_diffs: Dict with precomputed difference matrices:
335 - 'train_domain': differences between training and integration points
336 - 'train_cand': differences between training and candidate points
337 - 'cand_cand': differences between candidate points
338 - 'domain_cand': differences between integration and candidate points
339 noise_var: Observation noise variance
340 return_deriv: Whether to return derivatives
341 normalize: Whether to normalize y data (x data assumed already normalized)
343 Returns:
344 1D array of IMSE reductions for each candidate point
345 """
350 length_scales = params[:-1]
351 sigma_n = params[-1]
352 n_integration_points = kwargs['n_integration_points']
353 # Training kernel
354 diff_train_train = gp.differences_by_dim
355 K = degp_utils.rbf_kernel(
356 diff_train_train, length_scales, gp.n_order, gp.n_bases, gp.kernel_func,
357 gp.flattened_der_indicies, gp.powers, return_deriv=True
358 )
359 K += (10**sigma_n) ** 2 * np.eye(K.shape[0])
361 # Use precomputed differences
362 diff_train_domain = precomputed_diffs['train_domain']
363 diff_train_cand = precomputed_diffs['train_cand']
364 diff_cand_cand = precomputed_diffs['cand_cand']
365 diff_domain_cand = precomputed_diffs['domain_cand']
367 # Compute kernels from differences
368 K_s = degp_utils.rbf_kernel(
369 diff_train_domain, length_scales, gp.n_order, gp.n_bases, gp.kernel_func,
370 gp.flattened_der_indicies, gp.powers, return_deriv=return_deriv
371 )
373 K_train_cand = degp_utils.rbf_kernel(
374 diff_train_cand, length_scales, gp.n_order, gp.n_bases, gp.kernel_func,
375 gp.flattened_der_indicies, gp.powers, return_deriv=return_deriv
376 )
378 K_cand_cand = np.diag(degp_utils.rbf_kernel(
379 diff_cand_cand, length_scales, gp.n_order, gp.n_bases, gp.kernel_func,
380 gp.flattened_der_indicies, gp.powers, return_deriv=return_deriv
381 ))
384 K_domain_cand = degp_utils.rbf_kernel( diff_domain_cand, length_scales, gp.n_order, gp.n_bases, gp.kernel_func, gp.flattened_der_indicies, gp.powers, return_deriv=return_deriv )
385 # shape: (n_domain, n_candidates) # Solve K^{-1} @ K_train_cand for all candidates
386 v = np.linalg.solve(K, K_train_cand) # shape: (n_train, n_candidates) # Numerator and denominator of variance reduction
387 numerator = (K_domain_cand[:n_integration_points,:] - K_s.T @ v) ** 2 # shape: (n_domain, n_candidates)
388 denominator = K_cand_cand- np.sum(K_train_cand * v, axis=0) # shape: (n_candidates,) denominator = np.maximum(denominator, 1e-16)
389 variance_reduction = (numerator / denominator) # shape: (n_domain, n_candidates) # IMSE reduction for each candidate = mean over domain points
390 imse_reductions = variance_reduction.mean(axis=0) # shape: (n_candidates,)
391 return imse_reductions