Coverage for jetgp/utils.py: 51%
697 statements
« prev ^ index » next coverage.py v7.10.7, created at 2026-04-03 15:14 -0500
« prev ^ index » next coverage.py v7.10.7, created at 2026-04-03 15:14 -0500
1import numpy as np
2import pyoti.core as coti
3import pyoti.sparse as oti
4import sympy as sp
5from scipy.stats import qmc
6from scipy.optimize import minimize
7from scipy.stats import norm
8import sys
9from scipy.linalg import null_space
10#from full_gddegp import gddegp
11#from line_profiler import profile
12from scipy.stats.qmc import Sobol
13import scipy.stats as stats
14from fractions import Fraction
15from scipy.special import comb
16from numpy.polynomial import Polynomial
17import math
18#import utils
19from jetgp.full_degp import degp_utils
22def scale_samples(samples, lower_bounds, upper_bounds):
23 """
24 Scale each column of samples from the unit interval [0, 1] to user-defined bounds [lb_j, ub_j].
26 Parameters:
27 ----------
28 samples : ndarray of shape (d, n)
29 A 2D array where each column represents a sample in [0, 1]^n.
30 lower_bounds : array-like of length n
31 Lower bounds for each dimension.
32 upper_bounds : array-like of length n
33 Upper bounds for each dimension.
35 Returns:
36 -------
37 ndarray of shape (d, n)
38 Scaled samples where each column is mapped from [0, 1] to [lb_j, ub_j] for each dimension.
40 Notes:
41 -----
42 This function assumes that each sample is a column vector, and bounds are applied column-wise.
44 Example:
45 --------
46 >>> samples = np.array([[0.5, 0.2], [0.8, 0.4]])
47 >>> lower_bounds = [0, 1]
48 >>> upper_bounds = [1, 3]
49 >>> scale_samples(samples, lower_bounds, upper_bounds)
50 array([[0.5, 1.4],
51 [0.8, 1.8]])
52 """
54 samples = np.asarray(samples)
55 lower_bounds = np.asarray(lower_bounds)
56 upper_bounds = np.asarray(upper_bounds)
58 # Ensure correct shapes
59 assert (
60 samples.shape[1] == len(lower_bounds) == len(upper_bounds)
61 ), "Dimension mismatch between samples and bounds"
63 # Reshape bounds to broadcast across rows
64 lb = lower_bounds[np.newaxis, :]
65 ub = upper_bounds[np.newaxis, :]
67 return lb + samples * (ub - lb)
70def flatten_der_indices(indices):
71 """
72 Flatten a nested list of derivative indices.
74 Parameters:
75 ----------
76 indices : list of lists
77 A nested list where each sublist contains derivative index specifications.
79 Returns:
80 -------
81 list
82 A single flattened list containing all derivative index entries.
84 Example:
85 --------
86 >>> indices = [[[1, 1]], [[1, 2], [2, 1]]]
87 >>> flatten_der_indices(indices)
88 [[1, 1], [1, 2], [2, 1]]
89 """
91 flattened_indices = []
92 for i in range(0, len(indices)):
93 for j in range(0, len(indices[i])):
94 flattened_indices.append(indices[i][j])
95 return flattened_indices
98def nrmse(y_true, y_pred, norm_type="minmax"):
99 """
100 Compute the Normalized Root Mean Squared Error (NRMSE) between true and predicted values.
102 Parameters:
103 ----------
104 y_true : array-like
105 Ground truth or reference values.
106 y_pred : array-like
107 Predicted values to compare against the ground truth.
108 norm_type : str, default="minmax"
109 The method used to normalize the RMSE:
110 - 'minmax': Normalize by the range (max - min) of `y_true`.
111 - 'mean': Normalize by the mean of `y_true`.
112 - 'std': Normalize by the standard deviation of `y_true`.
114 Returns:
115 -------
116 float
117 The normalized root mean squared error.
119 Raises:
120 -------
121 ValueError
122 If `norm_type` is not one of {'minmax', 'mean', 'std'}.
124 Example:
125 --------
126 >>> y_true = np.array([3, 5, 2, 7])
127 >>> y_pred = np.array([2.5, 5.5, 2, 8])
128 >>> nrmse(y_true, y_pred, norm_type="mean")
129 0.1443 # Example value (varies depending on input)
130 """
132 y_true = np.asarray(y_true)
133 y_pred = np.asarray(y_pred)
135 mse = np.mean((y_true - y_pred) ** 2)
136 rmse = np.sqrt(mse)
138 if norm_type == "minmax":
139 norm = np.max(y_true) - np.min(y_true)
140 elif norm_type == "mean":
141 norm = np.mean(y_true)
142 elif norm_type == "std":
143 norm = np.std(y_true)
144 else:
145 raise ValueError("norm_type must be 'minmax', 'mean', or 'std'")
147 return rmse / norm if norm != 0 else np.inf
150def convert_index_to_exponent_form(lst):
151 """
152 Convert a list of indices into exponent form.
154 For a given list of integers, the function compresses consecutive identical elements
155 into pairs [value, count], where 'value' is the element and 'count' is its occurrence.
157 Parameters:
158 ----------
159 lst : list of int
160 A list of integers representing variable indices.
162 Returns:
163 -------
164 list of [int, int]
165 A compressed list where each entry is [value, count], representing the multiplicity
166 of each unique value in the original list.
168 Example:
169 --------
170 >>> lst = [1, 1, 2, 2, 2, 3]
171 >>> convert_index_to_exponent_form(lst)
172 [[1, 2], [2, 3], [3, 1]]
173 """
175 compressed = []
176 current_num = None
177 count = 0
179 for num in lst:
180 if num != current_num:
181 if current_num is not None:
182 compressed.append([current_num, count])
183 current_num = num
184 count = 1
185 else:
186 count += 1
188 if current_num is not None:
189 compressed.append([current_num, count])
191 return compressed
194def build_companion_array(nvars, order, der_indices):
195 """
196 Build a companion array that maps each derivative index to its corresponding order in the OTI basis.
198 The array represents the order of each component in the OTI (Order-Truncated Imaginary) number system:
199 - 0 for function values.
200 - Corresponding derivative order for each derivative term.
202 Parameters:
203 ----------
204 nvars : int
205 Number of variables (input dimensions).
206 order : int
207 Maximum derivative order considered.
208 der_indices : list of lists
209 Derivative indices in exponent form, where each sublist represents the derivative multi-index
210 for a specific derivative term.
212 Returns:
213 -------
214 companion_array : ndarray
215 A 1D array of length (1 + total derivatives), where:
216 - The first entry is 0 (function value).
217 - Each subsequent entry indicates the derivative order (e.g., 1 for first derivatives).
219 Example:
220 --------
221 >>> nvars = 2
222 >>> order = 2
223 >>> der_indices = [[[1, 1]], [[1, 2]], [[2, 1]]]
224 >>> build_companion_array(nvars, order, der_indices)
225 array([0, 1, 2, 1])
226 """
227 companion_list = [0]
228 for i in range(len(der_indices)):
229 for j in range(0, len(der_indices[i])):
230 companion_list.append(
231 compare_OTI_indices(nvars, order, der_indices[i][j]))
233 companion_array = np.array(companion_list)
234 return companion_array
236def build_companion_array_predict(nvars, order, der_indices):
237 """
238 Build a companion array that maps each derivative index to its corresponding order in the OTI basis.
240 The array represents the order of each component in the OTI (Order-Truncated Imaginary) number system:
241 - 0 for function values.
242 - Corresponding derivative order for each derivative term.
244 Parameters:
245 ----------
246 nvars : int
247 Number of variables (input dimensions).
248 order : int
249 Maximum derivative order considered.
250 der_indices : list of lists
251 Derivative indices in exponent form, where each sublist represents the derivative multi-index
252 for a specific derivative term.
254 Returns:
255 -------
256 companion_array : ndarray
257 A 1D array of length (1 + total derivatives), where:
258 - The first entry is 0 (function value).
259 - Each subsequent entry indicates the derivative order (e.g., 1 for first derivatives).
261 Example:
262 --------
263 >>> nvars = 2
264 >>> order = 2
265 >>> der_indices = [[[1, 1]], [[1, 2]], [[2, 1]]]
266 >>> build_companion_array(nvars, order, der_indices)
267 array([0, 1, 2, 1])
268 """
269 companion_list = [0]
270 for i in range(len(der_indices)):
271 companion_list.append(
272 compare_OTI_indices(nvars, order, der_indices[i]))
274 companion_array = np.array(companion_list)
275 return companion_array
278def compare_OTI_indices(nvars, order, term_check):
279 """
280 Compare a given multi-index term against all basis terms in the OTI number system.
282 This function searches through all OTI basis terms (multi-indices) up to the specified order
283 and identifies the **order** of the matching term.
285 Parameters:
286 ----------
287 nvars : int
288 Number of variables (input dimensions).
289 order : int
290 Maximum derivative order considered.
291 term_check : list of [int, int]
292 The multi-index to check, given in exponent form [[var_index, exponent], ...].
294 Returns:
295 -------
296 int
297 The order of the term in the OTI basis (1, 2, ..., order).
298 Returns -1 if no matching term is found.
300 Example:
301 --------
302 >>> nvars = 2
303 >>> order = 2
304 >>> term_check = [[1, 2]] # Represents ∂²/∂x₁²
305 >>> compare_OTI_indices(nvars, order, term_check)
306 2
307 """
308 dH = coti.get_dHelp()
310 for ordi in range(1, order + 1):
311 nterms = coti.ndir_order(nvars, ordi)
312 for idx in range(nterms):
313 term = convert_index_to_exponent_form(dH.get_fulldir(idx, ordi))
314 if np.array_equal(term, term_check):
315 return ordi
316 return -1
319def gen_OTI_indices(nvars, order):
320 """
321 Generate the list of OTI (Order-Truncated Imaginary) basis indices in exponent form.
323 For a given number of variables and maximum derivative order, this function produces the
324 multi-index representations for all basis terms in the OTI number system.
326 Parameters:
327 ----------
328 nvars : int
329 Number of variables (input dimensions).
330 order : int
331 Maximum derivative order considered.
333 Returns:
334 -------
335 list of lists
336 A nested list where:
337 - The outer list has length `order` (one entry per derivative order).
338 - Each inner list contains multi-indices in exponent form for that order.
340 Example:
341 --------
342 >>> nvars = 2
343 >>> order = 2
344 >>> gen_OTI_indices(nvars, order)
345 [
346 [[[1, 1]], [[2, 1]]], # First-order derivatives: ∂/∂x₁, ∂/∂x₂
347 # Second-order: ∂²/∂x₁², ∂²/∂x₁∂x₂, ∂²/∂x₂²
348 [[[1, 2]], [[1, 1], [2, 1]], [[2, 2]]]
349 ]
350 """
351 dH = coti.get_dHelp()
353 ind = [0] * order
355 for ordi in range(1, order + 1):
356 nterms = coti.ndir_order(nvars, ordi)
357 i = [0] * nterms
358 for idx in range(nterms):
359 i[idx] = convert_index_to_exponent_form(dH.get_fulldir(idx, ordi))
360 ind[ordi - 1] = i
361 return ind
364def reshape_y_train(y_train):
365 """
366 Flatten and concatenate function values and derivative observations into a single 1D array.
368 Parameters:
369 ----------
370 y_train : list of arrays
371 A list where:
372 - y_train[0] contains the function values (shape: (n_samples,))
373 - y_train[1:], if present, contain derivative values (shape: (n_samples,)) for each derivative component.
375 Returns:
376 -------
377 ndarray of shape (n_total,)
378 A flattened 1D array concatenating function values and all derivatives.
380 Example:
381 --------
382 >>> y_train = [np.array([1.0, 2.0]), np.array([0.5, 1.0])]
383 >>> reshape_y_train(y_train)
384 array([1.0, 2.0, 0.5, 1.0])
385 """
386 y_train_flattened = y_train[0]
387 for i in range(1, len(y_train)):
388 y_train_flattened = np.vstack(
389 (y_train_flattened.reshape(-1, 1), y_train[i].reshape(-1, 1)))
390 return y_train_flattened.flatten()
393def transform_cov(cov, sigma_y, sigmas_x, der_indices, X_test):
394 """
395 Rescale the diagonal of a covariance matrix to reflect the original (unnormalized) variance
396 of function values and derivatives.
398 This function transforms the variance estimates from normalized space back to the original scale.
400 Parameters:
401 ----------
402 cov : ndarray of shape (n_total, n_total)
403 Covariance matrix from the GP model (including function values and derivatives).
404 sigma_y : float
405 Standard deviation used to normalize the function values.
406 sigmas_x : ndarray of shape (1, nvars)
407 Standard deviations used to normalize each input dimension.
408 der_indices : list of lists
409 Derivative multi-indices, where each sublist represents the derivative directions and orders.
410 X_test : ndarray of shape (n_samples, nvars)
411 Test input points corresponding to the covariance matrix blocks.
413 Returns:
414 -------
415 y_var_normalized : ndarray of shape (n_total,)
416 Rescaled variances for function values and derivatives in the original space.
418 Example:
419 --------
420 >>> cov.shape = (n_total, n_total)
421 >>> transform_cov(cov, sigma_y, sigmas_x, der_indices, X_test).shape == (n_total,)
422 """
423 var = abs(np.diag(cov))
424 y_var_normalized = np.zeros((var.shape[0],))
425 y_var_normalized[0:X_test.shape[0]] = var[0:X_test.shape[0]]*sigma_y**2
426 for i in range(len(der_indices)):
427 factor = 1
428 for j in range(len(der_indices[i])):
429 factor = factor * \
430 (sigmas_x[0][der_indices[i][j][0] - 1])**(der_indices[i][j][1])
431 factor = sigma_y**2 / factor**2
432 y_var_normalized[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]
433 ] = var[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]]*factor
434 return y_var_normalized
437def transform_cov_directional(cov, sigma_y, sigmas_x, der_indices, X_test):
438 """
439 Rescale the diagonal of a covariance matrix for function values and directional derivatives.
441 Unlike `transform_cov`, this function assumes **directional derivatives** (not multi-index derivatives),
442 so no input scaling (`sigmas_x`) is applied to derivative terms.
444 Parameters:
445 ----------
446 cov : ndarray of shape (n_total, n_total)
447 Covariance matrix from the GP model (including function values and directional derivatives).
448 sigma_y : float
449 Standard deviation used to normalize the function values.
450 sigmas_x : ndarray of shape (1, nvars)
451 Standard deviations used to normalize each input dimension (unused for derivatives here).
452 der_indices : list of lists
453 Derivative directions (for directional derivatives).
454 X_test : ndarray of shape (n_samples, nvars)
455 Test input points corresponding to the covariance matrix blocks.
457 Returns:
458 -------
459 y_var_normalized : ndarray of shape (n_total,)
460 Rescaled variances for function values and directional derivatives in the original space.
462 Example:
463 --------
464 >>> cov.shape = (n_total, n_total)
465 >>> transform_cov_directrional(cov, sigma_y, sigmas_x, der_indices, X_test).shape == (n_total,)
466 """
467 var = abs(np.diag(cov))
468 y_var_normalized = np.zeros((var.shape[0],))
469 y_var_normalized[0:X_test.shape[0]] = var[0:X_test.shape[0]]*sigma_y**2
470 for i in range(len(der_indices)):
471 factor = sigma_y**2
472 y_var_normalized[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]
473 ] = var[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]]*factor
474 return y_var_normalized
477def transform_predictions(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test):
478 """
479 Rescale predicted function values and derivatives from normalized space back to their original scale.
481 This function transforms both function value predictions and **multi-index derivatives**
482 back to the original units after GP prediction.
484 Parameters:
485 ----------
486 y_pred : ndarray of shape (n_total,)
487 Predicted mean values from the GP model in normalized space
488 (includes function values and derivatives).
489 mu_y : float
490 Mean of the original function values (before normalization).
491 sigma_y : float
492 Standard deviation of the original function values (before normalization).
493 sigmas_x : ndarray of shape (1, nvars)
494 Standard deviations of the input variables (used for rescaling derivatives).
495 der_indices : list of lists
496 Multi-index derivative structures for each derivative component.
497 X_test : ndarray of shape (n_samples, nvars)
498 Test input points corresponding to the prediction blocks.
500 Returns:
501 -------
502 y_train_normalized : ndarray of shape (n_total, 1)
503 Rescaled function values and derivatives in the original scale.
505 Example:
506 --------
507 >>> y_pred.shape = (n_total,)
508 >>> transform_predictions(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test).shape
509 (n_total, 1)
510 """
511 y_train_normalized = (
512 mu_y + y_pred[0:X_test.shape[0]]*sigma_y).reshape(-1, 1)
513 for i in range(len(der_indices)):
514 factor = sigma_y
515 for j in range(len(der_indices[i])):
516 factor = factor / \
517 (sigmas_x[0][der_indices[i][j][0] - 1])**(der_indices[i][j][1])
518 y_train_normalized = np.vstack((y_train_normalized, (y_pred[(
519 i + 1) * X_test.shape[0]: (i + 2) * X_test.shape[0]] * factor[0, 0]).reshape(-1, 1)))
520 return y_train_normalized
523def transform_predictions_directional(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test):
524 """
525 Rescale predicted function values and **directional derivatives** from normalized space
526 back to their original scale.
528 This function assumes the derivatives are **directional** (not multi-index),
529 so it applies only output scaling (sigma_y) to derivatives.
531 Parameters:
532 ----------
533 y_pred : ndarray of shape (n_total,)
534 Predicted mean values from the GP model in normalized space
535 (includes function values and directional derivatives).
536 mu_y : float
537 Mean of the original function values (before normalization).
538 sigma_y : float
539 Standard deviation of the original function values (before normalization).
540 sigmas_x : ndarray of shape (1, nvars)
541 Standard deviations of the input variables (not used here but included for compatibility).
542 der_indices : list of lists
543 Directions for directional derivatives (each sublist represents a direction vector).
544 X_test : ndarray of shape (n_samples, nvars)
545 Test input points corresponding to the prediction blocks.
547 Returns:
548 -------
549 y_train_normalized : ndarray of shape (n_total, 1)
550 Rescaled function values and directional derivatives in the original scale.
552 Example:
553 --------
554 >>> y_pred.shape = (n_total,)
555 >>> transform_predictions_directional(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test).shape
556 (n_total, 1)
557 """
558 y_train_normalized = (
559 mu_y + y_pred[0:X_test.shape[0]]*sigma_y).reshape(-1, 1)
560 for i in range(len(der_indices)):
561 factor = sigma_y
562 y_train_normalized = np.vstack((y_train_normalized, (y_pred[(
563 i + 1) * X_test.shape[0]: (i + 2) * X_test.shape[0]] * factor[0, 0]).reshape(-1, 1)))
564 return y_train_normalized
567def normalize_x_data_test(X_test, sigmas_x, mus_x):
568 """
569 Normalize test input data using the mean and standard deviation from the training inputs.
571 Parameters:
572 ----------
573 X_test : ndarray of shape (n_samples, nvars)
574 Test input points to be normalized.
575 sigmas_x : ndarray of shape (1, nvars)
576 Standard deviations of the training inputs for each variable (used for scaling).
577 mus_x : ndarray of shape (1, nvars)
578 Means of the training inputs for each variable (used for centering).
580 Returns:
581 -------
582 X_train_normalized : ndarray of shape (n_samples, nvars)
583 Normalized test inputs.
585 Example:
586 --------
587 >>> X_test = np.array([[2.0, 3.0]])
588 >>> sigmas_x = np.array([[1.0, 2.0]])
589 >>> mus_x = np.array([[0.0, 1.0]])
590 >>> normalize_x_data_test(X_test, sigmas_x, mus_x)
591 array([[2.0, 1.0]])
592 """
594 X_train_normalized = (X_test - mus_x) / sigmas_x
596 return X_train_normalized
599def normalize_x_data_train(X_train):
600 """
601 Normalize training input data by centering and scaling each variable.
603 Parameters:
604 ----------
605 X_train : ndarray of shape (n_samples, nvars)
606 Training input points.
608 Returns:
609 -------
610 X_train_normalized : ndarray of shape (n_samples, nvars)
611 Normalized training inputs.
612 mean_vec_x : ndarray of shape (1, nvars)
613 Mean values for each input variable (used for centering).
614 std_vec_x : ndarray of shape (1, nvars)
615 Standard deviations for each input variable (used for scaling).
617 Example:
618 --------
619 >>> X_train = np.array([[1.0, 2.0], [3.0, 4.0]])
620 >>> normalize_x_data_train(X_train)
621 (array([[-1., -1.], [ 1., 1.]]), array([[2., 3.]]), array([[1., 1.]]))
622 """
623 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1)
624 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1)
626 X_train_normalized = (X_train - mean_vec_x) / std_vec_x
628 return X_train_normalized
631def normalize_directions(sigmas_x, rays):
632 """
633 Normalize direction vectors (rays) based on input scaling.
635 This function rescales direction vectors used for **directional derivatives** so that
636 they are consistent with normalized input space.
638 Parameters:
639 ----------
640 sigmas_x : ndarray of shape (1, nvars)
641 Standard deviations of the input variables (used for scaling each direction).
642 rays : ndarray of shape (nvars, n_directions)
643 Direction vectors (columns) in the original input space.
645 Returns:
646 -------
647 transformed_rays : ndarray of shape (nvars, n_directions)
648 Normalized direction vectors.
650 Example:
651 --------
652 >>> sigmas_x = np.array([[2.0, 1.0]])
653 >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]])
654 >>> normalize_directions(sigmas_x, rays)
655 array([[0.5, 0. ],
656 [0. , 1. ]])
657 """
658 transformed_rays = np.zeros(rays.shape)
659 for i in range(rays.shape[1]):
660 transformed_rays[:, i] = np.diag(1/sigmas_x.flatten()) @ rays[:, i]
661 return transformed_rays
664# def normalize_directions_2(sigmas_x, rays):
665# """
666# Normalize direction vectors (rays) based on input scaling.
668# This function rescales direction vectors used for **directional derivatives** so that
669# they are consistent with normalized input space.
671# Parameters:
672# ----------
673# sigmas_x : ndarray of shape (1, nvars)
674# Standard deviations of the input variables (used for scaling each direction).
675# rays : ndarray of shape (nvars, n_directions)
676# Direction vectors (columns) in the original input space.
678# Returns:
679# -------
680# transformed_rays : ndarray of shape (nvars, n_directions)
681# Normalized direction vectors.
683# Example:
684# --------
685# >>> sigmas_x = np.array([[2.0, 1.0]])
686# >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]])
687# >>> normalize_directions(sigmas_x, rays)
688# array([[0.5, 0. ],
689# [0. , 1. ]])
690# """
691# transformed_rays_list = []
692# for i in range(len(rays)):
694# transformed_rays = np.zeros(rays[i].shape)
695# for j in range(transformed_rays.shape[1]):
696# transformed_rays[:, j] = np.diag(
697# 1/sigmas_x.flatten()) @ rays[i][:, j]
698# transformed_rays_list.append(transformed_rays)
699# return transformed_rays_list
702def normalize_directions_2(sigmas_x, rays_array):
703 """
704 Normalize direction vectors (rays) based on input scaling.
706 This function rescales direction vectors used for **directional derivatives** so that
707 they are consistent with normalized input space.
709 Parameters:
710 ----------
711 sigmas_x : ndarray of shape (1, nvars)
712 Standard deviations of the input variables (used for scaling each direction).
713 rays : ndarray of shape (nvars, n_directions)
714 Direction vectors (columns) in the original input space.
716 Returns:
717 -------
718 transformed_rays : ndarray of shape (nvars, n_directions)
719 Normalized direction vectors.
721 Example:
722 --------
723 >>> sigmas_x = np.array([[2.0, 1.0]])
724 >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]])
725 >>> normalize_directions(sigmas_x, rays)
726 array([[0.5, 0. ],
727 [0. , 1. ]])
728 """
730 return [rays_array[i] / sigmas_x.flatten()[:, None] for i in range(len(rays_array))]
733def normalize_y_data(X_train, y_train, sigma_data, der_indices):
734 """
735 Normalize function values, derivatives, and observational noise for training data.
737 This function:
738 - Normalizes function values (`y_train[0]`) to have zero mean and unit variance.
739 - Scales **derivatives** using the chain rule, considering input normalization.
740 - Scales **observational noise** (`sigma_data`) accordingly.
742 Parameters:
743 ----------
744 X_train : ndarray of shape (n_samples, nvars)
745 Training input points (used to compute input normalization statistics).
746 y_train : list of arrays
747 List where:
748 - y_train[0] contains function values.
749 - y_train[1:], if present, contain derivative values for each derivative component.
750 sigma_data : float or None
751 Standard deviation of the observational noise (for function values).
752 If provided, it will be normalized.
753 der_indices : list of lists
754 Multi-index derivative structures for each derivative component.
756 Returns:
757 -------
758 y_train_normalized : ndarray of shape (n_total,)
759 Normalized function values and derivatives (flattened).
760 mean_vec_y : ndarray of shape (m, 1)
761 Mean of function values before normalization.
762 std_vec_y : ndarray of shape (m, 1)
763 Standard deviation of function values before normalization.
764 std_vec_x : ndarray of shape (1, nvars)
765 Standard deviations of input variables.
766 mean_vec_x : ndarray of shape (1, nvars)
767 Means of input variables.
768 noise_std_normalized : float or None
769 Normalized observational noise standard deviation.
771 Example:
772 --------
773 >>> normalize_y_data(X_train, y_train, sigma_data=0.75, der_indices=[[[1, 1]]])
774 (y_train_normalized, mean_vec_y, std_vec_y,
775 std_vec_x, mean_vec_x, noise_std_normalized)
776 """
778 num_points = len(X_train)
779 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1)
780 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1)
782 mean_vec_y = np.mean(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1)
783 std_vec_y = np.std(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1)
785 y_train_normalized = (y_train[0] - mean_vec_y)/std_vec_y
786 noise_std_normalized = sigma_data
787 for i in range(len(der_indices)):
788 factor = 1/std_vec_y
789 for j in range(len(der_indices[i])):
790 factor = factor * \
791 (std_vec_x[0][der_indices[i][j][0] - 1]
792 )**(der_indices[i][j][1])
793 y_train_normalized = np.vstack(
794 (y_train_normalized.reshape(-1, 1), y_train[i + 1] * factor[0, 0]))
795 if sigma_data is not None:
796 noise_std_normalized[(
797 i + 1)*num_points:(i+2)*num_points] = sigma_data[(
798 i + 1)*num_points:(i+2)*num_points] * factor[0, 0]
799 else:
800 noise_std_normalized = None
801 # Scale noise if provided
803 return y_train_normalized.flatten(), mean_vec_y, std_vec_y, std_vec_x, mean_vec_x, noise_std_normalized
806def normalize_y_data_directional(X_train, y_train, sigma_data, der_indices):
807 """
808 Normalize function values and **directional derivatives** for training data.
810 This function:
811 - Normalizes **function values (`y_train[0]`)** to have zero mean and unit variance.
812 - Scales **directional derivatives** (`y_train[1:]`) by the **function value standard deviation (`std_vec_y`)**.
814 Parameters:
815 ----------
816 X_train : ndarray of shape (n_samples, nvars)
817 Training input points (used to compute input normalization statistics).
818 y_train : list of arrays
819 List where:
820 - y_train[0] contains function values.
821 - y_train[1:], if present, contain directional derivative values for each direction.
822 der_indices : list of lists
823 Directions for directional derivatives (each sublist represents a direction vector).
825 Returns:
826 -------
827 y_train_normalized : ndarray of shape (n_total,)
828 Normalized function values and directional derivatives (flattened).
829 mean_vec_y : ndarray of shape (m, 1)
830 Mean of function values before normalization.
831 std_vec_y : ndarray of shape (m, 1)
832 Standard deviation of function values before normalization.
833 std_vec_x : ndarray of shape (1, nvars)
834 Standard deviations of input variables.
835 mean_vec_x : ndarray of shape (1, nvars)
836 Means of input variables.
838 Example:
839 --------
840 >>> normalize_y_data_directional(X_train, y_train, der_indices=[[[1, 0.5], [2, 0.5]]])
841 (y_train_normalized, mean_vec_y, std_vec_y, std_vec_x, mean_vec_x)
842 """
843 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1)
844 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1)
846 mean_vec_y = np.mean(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1)
847 std_vec_y = np.std(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1)
849 y_train_normalized = (y_train[0] - mean_vec_y)/std_vec_y
851 for i in range(len(der_indices)):
852 factor = 1/std_vec_y
853 y_train_normalized = np.vstack(
854 (y_train_normalized.reshape(-1, 1), y_train[i + 1] * factor[0, 0]))
855 if sigma_data is not None:
856 noise_std_normalized = sigma_data / std_vec_y[0, 0]
857 else:
858 noise_std_normalized = None
860 return y_train_normalized.flatten(), mean_vec_y, std_vec_y, std_vec_x, mean_vec_x, noise_std_normalized
862def generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices):
863 """
864 Generate diagonal noise covariance matrices for each submodel component
865 (including function values and their associated derivatives).
867 This function constructs a list of diagonal matrices where each matrix corresponds
868 to a specific group of training indices (e.g., for a submodel), and includes
869 both function value noise and associated derivative noise.
871 Parameters
872 ----------
873 sigma_data : ndarray of shape (n_total, n_total)
874 Full covariance (typically diagonal) matrix for all training data, including function values and derivatives.
875 index : list of lists of int
876 List where each sublist contains indices of the function values for one submodel (e.g., a cluster or partition).
877 der_indices : list of lists
878 Each sublist contains derivative directions for the submodel, corresponding to base_der_indices.
879 num_points : int
880 Number of training points per function (used to compute index offsets for derivatives).
881 base_der_indices : list of lists
882 Master list of derivative indices that define the ordering of blocks in the covariance matrix.
884 Returns
885 -------
886 sub_model_matricies : list of ndarray
887 List of diagonal noise matrices (ndarray of shape (n_submodel_total, n_submodel_total)) for each submodel,
888 combining noise contributions from function values and all applicable derivative components.
890 Raises
891 ------
892 Exception
893 If a derivative index in `der_indices` is not found in `base_der_indices`.
895 Example
896 -------
897 >>> sigma_data.shape = (300, 300)
898 >>> index = [[0, 1, 2], [3, 4, 5]]
899 >>> der_indices = [[[1, 1]], [[2, 1], [1, 2]]]
900 >>> base_der_indices = [[[1, 1]], [[2, 1]], [[1, 2]]]
901 >>> num_points = 100
902 >>> generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices)
903 [array of shape (6, 6), array of shape (9, 9)]
904 """
906 sub_model_matricies = []
907 for i, idx in enumerate(index):
908 values = np.diag(sigma_data[:num_points, :num_points])
909 for j in range(len(der_indices[i])):
910 for k, item in enumerate(base_der_indices):
911 if item == der_indices[i][j]:
912 scale_factor = k
913 break
914 else:
915 scale_factor = -1
916 continue
918 if scale_factor == -1:
919 raise Exception('Unknown Error')
920 scale_factor = k + 1
921 indices = (scale_factor*num_points)+np.array(idx[j])
922 if len(indices) == 0:
923 pass
924 else:
925 values = np.concatenate(
926 (values, sigma_data[indices[0:], indices[0:]].flatten()))
927 sub_model_matricies.append(np.diag(values))
929 return sub_model_matricies
930def generate_submodel_noise_matricies_old(sigma_data, index, der_indices, num_points, base_der_indices):
931 """
932 Generate diagonal noise covariance matrices for each submodel component
933 (including function values and their associated derivatives).
935 This function constructs a list of diagonal matrices where each matrix corresponds
936 to a specific group of training indices (e.g., for a submodel), and includes
937 both function value noise and associated derivative noise.
939 Parameters
940 ----------
941 sigma_data : ndarray of shape (n_total, n_total)
942 Full covariance (typically diagonal) matrix for all training data, including function values and derivatives.
943 index : list of lists of int
944 List where each sublist contains indices of the function values for one submodel (e.g., a cluster or partition).
945 der_indices : list of lists
946 Each sublist contains derivative directions for the submodel, corresponding to base_der_indices.
947 num_points : int
948 Number of training points per function (used to compute index offsets for derivatives).
949 base_der_indices : list of lists
950 Master list of derivative indices that define the ordering of blocks in the covariance matrix.
952 Returns
953 -------
954 sub_model_matricies : list of ndarray
955 List of diagonal noise matrices (ndarray of shape (n_submodel_total, n_submodel_total)) for each submodel,
956 combining noise contributions from function values and all applicable derivative components.
958 Raises
959 ------
960 Exception
961 If a derivative index in `der_indices` is not found in `base_der_indices`.
963 Example
964 -------
965 >>> sigma_data.shape = (300, 300)
966 >>> index = [[0, 1, 2], [3, 4, 5]]
967 >>> der_indices = [[[1, 1]], [[2, 1], [1, 2]]]
968 >>> base_der_indices = [[[1, 1]], [[2, 1]], [[1, 2]]]
969 >>> num_points = 100
970 >>> generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices)
971 [array of shape (6, 6), array of shape (9, 9)]
972 """
974 sub_model_matricies = []
975 for i, idx in enumerate(index):
976 values = np.diag(sigma_data[:num_points, :num_points])
977 for j in range(len(der_indices[i])):
978 for k, item in enumerate(base_der_indices):
979 if item == der_indices[i][j]:
980 scale_factor = k
981 break
982 else:
983 scale_factor = -1
984 continue
986 if scale_factor == -1:
987 raise Exception('Unknown Error')
988 scale_factor = k + 1
989 indices = (scale_factor*num_points)+np.array(idx)
990 values = np.concatenate(
991 (values, sigma_data[indices[0:], indices[0:]].flatten()))
992 sub_model_matricies.append(np.diag(values))
994 return sub_model_matricies
997def matern_kernel_builder(nu, oti_module=None):
998 """
999 Symbolically builds the Matérn kernel function with given smoothness ν.
1001 Parameters
1002 ----------
1003 nu : float
1004 Smoothness parameter of the Matérn kernel. Should be a half-integer (e.g., 0.5, 1.5, 2.5, ...).
1005 oti_module : module, optional
1006 The PyOTI static module to use for exp/sqrt. If None, uses numpy.
1008 Returns
1009 -------
1010 callable
1011 A lambdified function that evaluates the Matérn kernel as a function of distance r.
1012 """
1013 r = sp.symbols('r')
1014 nu = sp.Rational(2 * nu, 2)
1015 prefactor = (2 ** (1 - nu)) / sp.gamma(nu)
1016 z = sp.sqrt(2 * nu) * r
1017 k_r = prefactor * z**nu * sp.simplify(sp.besselk(nu, z))
1018 expr = sp.simplify(k_r)
1020 if oti_module is not None:
1021 custom_dict = {"exp": oti_module.exp}
1022 matern_kernel_func = sp.lambdify(r, expr, modules=[custom_dict, "numpy"])
1023 else:
1024 matern_kernel_func = sp.lambdify(r, expr, modules=["numpy"])
1026 return matern_kernel_func
1029def matern_kernel_grad_builder(nu, oti_module=None):
1030 """
1031 Builds the derivative df/dr of the Matérn kernel function with given smoothness ν.
1033 Parameters
1034 ----------
1035 nu : float
1036 Smoothness parameter of the Matérn kernel (half-integer, e.g. 0.5, 1.5, 2.5).
1037 oti_module : module, optional
1038 The PyOTI static module to use for exp. If None, uses numpy.
1040 Returns
1041 -------
1042 callable
1043 A lambdified function evaluating df/dr as a function of scaled distance r.
1044 """
1045 r = sp.symbols('r')
1046 nu = sp.Rational(2 * nu, 2)
1047 prefactor = (2 ** (1 - nu)) / sp.gamma(nu)
1048 z = sp.sqrt(2 * nu) * r
1049 k_r = prefactor * z**nu * sp.simplify(sp.besselk(nu, z))
1050 dk_dr = sp.diff(sp.simplify(k_r), r)
1051 dk_dr = sp.simplify(dk_dr)
1053 if oti_module is not None:
1054 custom_dict = {"exp": oti_module.exp}
1055 return sp.lambdify(r, dk_dr, modules=[custom_dict, "numpy"])
1056 else:
1057 return sp.lambdify(r, dk_dr, modules=["numpy"])
1061def generate_bernoulli_numbers(n_max: int) -> list[Fraction]:
1062 """
1063 Generates Bernoulli numbers B_0 to B_n_max using their recurrence relation.
1065 Args:
1066 n_max: The maximum order of the Bernoulli number to generate.
1068 Returns:
1069 A list of Bernoulli numbers as Fraction objects.
1070 """
1071 if n_max < 0:
1072 return []
1074 B = [Fraction(0)] * (n_max + 1)
1075 B[0] = Fraction(1) # B_0 = 1
1077 for n in range(1, n_max + 1):
1078 # The recurrence relation sum: sum_{k=0 to n} C(n+1, k) * B_k = 0
1079 # We solve for B_n: B_n = - (1 / C(n+1, n)) * sum_{k=0 to n-1} C(n+1, k) * B_k
1080 sum_val = Fraction(0)
1081 for k in range(n):
1082 # Binomial coefficient C(n+1, k)
1083 binomial_coeff = comb(n + 1, k, exact=True)
1084 sum_val += binomial_coeff * B[k]
1086 # C(n+1, n) is just n+1
1087 B[n] = -sum_val / (n + 1)
1089 # B_k is zero for all odd k > 1
1090 if n > 1 and n % 2 != 0:
1091 B[n] = Fraction(0)
1093 return B
1095def generate_bernoulli_polynomial(alpha: int) -> Polynomial:
1096 """
1097 Generates the (2*alpha)-th Bernoulli polynomial, B_{2*alpha}(x).
1099 Args:
1100 alpha: A non-negative integer.
1102 Returns:
1103 A numpy.polynomial.Polynomial object representing the polynomial.
1104 """
1105 if not isinstance(alpha, int) or alpha < 0:
1106 raise ValueError("alpha must be a non-negative integer.")
1108 n = 2 * alpha
1110 # Step 1: Generate the required Bernoulli numbers
1111 bernoulli_nums = generate_bernoulli_numbers(n)
1113 # Step 2: Construct the polynomial coefficients
1114 # The polynomial is sum_{k=0 to n} C(n, k) * B_k * x^(n-k)
1115 # The coefficient of x^j is C(n, n-j) * B_{n-j}
1116 # numpy.polynomial expects coeffs from lowest power (x^0) to highest (x^n)
1118 coeffs = np.zeros(n + 1, dtype=object)
1120 for j in range(n + 1): # j represents the power of x
1121 k = n - j
1122 # Coefficient for x^j is C(n, k) * B_k
1123 binomial_coeff = comb(n, k, exact=True)
1124 term_coeff = binomial_coeff * bernoulli_nums[k]
1125 coeffs[j] = float(term_coeff) # Convert fraction to float for Polynomial class
1127 # The Polynomial class takes coefficients from lowest power to highest
1128 return Polynomial(coeffs)
1130def generate_bernoulli_lambda(alpha: int) -> callable:
1131 """
1132 Generates a callable (lambda) function for the (2*alpha)-th Bernoulli polynomial.
1134 Args:
1135 alpha: A non-negative integer.
1137 Returns:
1138 A callable function that evaluates B_{2*alpha}(x).
1139 """
1140 # The numpy Polynomial object is already a callable function.
1141 return ((-1)**(alpha + 1) * (2*np.pi)**(2 * alpha)) / (math.factorial(2 * alpha)) * generate_bernoulli_polynomial(alpha)
1144def robust_local_optimization(func, x0, args=(), lb=None, ub=None, debug=False):
1145 """Robust L-BFGS-B optimization with abnormal termination handling"""
1146 bounds = list(zip(lb, ub)) if lb is not None and ub is not None else None
1148 result = minimize(
1149 func, x0, args=args, method="L-BFGS-B", bounds=bounds,
1150 options={"disp": False, "maxiter": 1000, "gtol": 1e-7}
1151 )
1152 sys.stdout.flush()
1154 if "ABNORMAL_TERMINATION_IN_LNSRCH" in result.message:
1155 result.recovered_from_abnormal = False
1156 if debug:
1157 print(f"L-BFGS-B terminated abnormally: {result.message:}")
1158 print("Using current point as recovered solution...")
1160 # Create result with current point
1161 class RecoveredResult:
1162 def __init__(self, x, fun, message):
1163 self.x = x
1164 self.fun = fun
1165 self.success = False
1166 self.message = message
1167 self.recovered_from_abnormal = True
1169 current_f = func(result.x, *args)
1170 return RecoveredResult(result.x, current_f, f"Recovered from abnormal termination: {result.message}")
1171 else:
1172 return result
1175def should_accept_local_result(local_res, current_best_f, is_feasible, debug=False):
1176 """Check if local optimization result should be accepted"""
1177 if not local_res.success and not local_res.recovered_from_abnormal:
1178 return False
1180 if not is_feasible(local_res.x):
1181 if debug:
1182 print("Local optimization result is infeasible - rejecting")
1183 return False
1185 if local_res.fun >= current_best_f:
1186 if debug:
1187 print(
1188 "Local optimization did not improve:")
1189 return False
1191 return True
1195def jade(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={},
1196 pop_size=100, n_generations=100, p=0.1, c=0.1,
1197 minstep=1e-6, stagnation_limit=15, debug=False,
1198 local_opt_every=15, initial_positions=None, seed=42,
1199 local_optimizer=None, func_and_grad=None, grad_func=None):
1200 """
1201 JADE (Adaptive Differential Evolution) with optional local refinement
1202 and stagnation-based stopping criterion.
1204 JADE: Adaptive Differential Evolution With Optional External Archive
1205 https://ieeexplore.ieee.org/abstract/document/5208221
1207 Parameters
1208 ----------
1209 func : callable
1210 Objective function to minimize.
1211 lb, ub : array-like
1212 Lower and upper bounds.
1213 ieqcons : list
1214 List of inequality constraint functions.
1215 f_ieqcons : callable or None
1216 Single function returning array of constraint values.
1217 args : tuple
1218 Extra arguments passed to func.
1219 kwargs : dict
1220 Extra keyword arguments passed to func.
1221 pop_size : int
1222 Population size.
1223 n_generations : int
1224 Maximum number of generations.
1225 p : float
1226 Fraction of top individuals for p-best selection.
1227 c : float
1228 Learning rate for parameter adaptation.
1229 minstep : float
1230 Minimum position change to accept a new best (when improving).
1231 stagnation_limit : int
1232 Stop if no improvement for this many consecutive generations.
1233 debug : bool
1234 Print debug information.
1235 local_opt_every : int or None
1236 Run local optimization every this many generations. None to disable.
1237 initial_positions : array-like or None
1238 Initial positions to seed the population.
1239 seed : int
1240 Random seed.
1241 """
1243 lb = np.asarray(lb)
1244 ub = np.asarray(ub)
1245 D = len(lb)
1246 np.random.seed(seed)
1248 # Constraint setup
1249 if f_ieqcons is not None:
1250 def cons(x): return np.asarray(f_ieqcons(x, *args, **kwargs))
1251 elif ieqcons:
1252 def cons(x): return np.asarray([c(x, *args, **kwargs) for c in ieqcons])
1253 else:
1254 def cons(x): return np.array([0.0])
1256 def is_feasible(x):
1257 return np.all(cons(x) >= 0)
1259 # Initialize population using Sobol
1260 sampler = qmc.Sobol(d=D, scramble=True, seed=seed)
1261 sobol_sample = sampler.random_base2(m=int(np.ceil(np.log2(pop_size))))
1262 pop = qmc.scale(sobol_sample[:pop_size], lb, ub)
1263 if initial_positions is not None:
1264 n_init = min(len(initial_positions), pop_size)
1265 pop[:n_init] = initial_positions[:n_init]
1267 # Fitness and feasibility
1268 fitness = np.array([func(ind, *args, **kwargs) for ind in pop])
1269 feasible = np.array([is_feasible(ind) for ind in pop])
1271 # Initialize global best
1272 g_idx = np.argmin(fitness * feasible + (~feasible) * 1e10)
1273 g_best = pop[g_idx].copy()
1274 f_best = fitness[g_idx]
1276 mu_F = 0.5
1277 mu_CR = 0.5
1278 archive = []
1280 # Store previous best for minstep comparison
1281 prev_g_best = g_best.copy()
1283 # Stagnation counter
1284 stagnation_count = 0
1286 for gen in range(1, n_generations + 1):
1287 f_prev_gen = f_best
1289 new_pop = np.zeros_like(pop)
1290 new_fitness = np.zeros(pop_size)
1291 new_F_list = []
1292 new_CR_list = []
1294 for i in range(pop_size):
1295 # Adaptive F and CR
1296 F = np.random.standard_cauchy() * 0.1 + mu_F
1297 F = np.clip(F, 0, 1)
1298 CR = np.random.normal(mu_CR, 0.1)
1299 CR = np.clip(CR, 0, 1)
1301 # p-best selection
1302 p_num = max(2, int(np.ceil(p * pop_size)))
1303 pbest_idx = np.random.choice(np.argsort(fitness)[:p_num])
1304 x_pbest = pop[pbest_idx]
1306 # Mutation: current-to-pbest
1307 idxs = [idx for idx in range(pop_size) if idx != i]
1308 r1, r2 = np.random.choice(idxs, 2, replace=False)
1309 x_mut = pop[i] + F * (x_pbest - pop[i]) + F * (pop[r1] - pop[r2])
1310 x_mut = np.clip(x_mut, lb, ub)
1312 # Crossover
1313 jrand = np.random.randint(D)
1314 trial = np.array([
1315 x_mut[j] if np.random.rand() < CR or j == jrand
1316 else pop[i][j] for j in range(D)
1317 ])
1318 trial = np.clip(trial, lb, ub)
1320 f_trial = func(trial, *args, **kwargs)
1321 if is_feasible(trial) and f_trial < fitness[i]:
1322 new_pop[i] = trial
1323 new_fitness[i] = f_trial
1324 archive.append(pop[i].copy())
1325 new_F_list.append(F)
1326 new_CR_list.append(CR)
1327 else:
1328 new_pop[i] = pop[i]
1329 new_fitness[i] = fitness[i]
1331 # Update population
1332 pop = new_pop
1333 fitness = new_fitness
1335 # Update global best
1336 feasible_idx = [idx for idx in range(pop_size) if is_feasible(pop[idx])]
1337 if feasible_idx:
1338 idx_best = feasible_idx[np.argmin(fitness[feasible_idx])]
1339 if fitness[idx_best] < f_best:
1340 # Check minstep only when g_best improves
1341 step_size = np.linalg.norm(pop[idx_best] - prev_g_best)
1342 if step_size <= minstep:
1343 if debug:
1344 print(f"Stopping: Position change < {minstep} at generation {gen}")
1345 g_best = pop[idx_best].copy()
1346 f_best = fitness[idx_best]
1347 break
1349 g_best = pop[idx_best].copy()
1350 f_best = fitness[idx_best]
1351 prev_g_best = g_best.copy()
1353 # Adapt F and CR
1354 if new_F_list:
1355 mu_F = (1 - c) * mu_F + c * np.mean(new_F_list)
1356 mu_CR = (1 - c) * mu_CR + c * np.mean(new_CR_list)
1358 # Periodic local refinement
1359 if local_opt_every is not None and gen % local_opt_every == 0:
1360 if local_optimizer is not None:
1361 res = local_optimizer(func, g_best, lb, ub)
1362 elif callable(func_and_grad):
1363 res = minimize(func_and_grad, g_best, args=args, method="L-BFGS-B",
1364 jac=True, bounds=np.stack((lb, ub), axis=1),
1365 options={"maxiter": 50})
1366 elif callable(grad_func):
1367 def _fg(x): return func(x), grad_func(x)
1368 res = minimize(_fg, g_best, args=args, method="L-BFGS-B",
1369 jac=True, bounds=np.stack((lb, ub), axis=1),
1370 options={"maxiter": 50})
1371 else:
1372 res = minimize(func, g_best, args=args,
1373 bounds=np.stack((lb, ub), axis=1),
1374 options={"maxiter":50})
1375 if is_feasible(res.x) and res.fun < f_best:
1376 g_best = res.x.copy()
1377 f_best = res.fun
1378 prev_g_best = g_best.copy()
1379 if debug:
1380 print(f"Local refinement at gen {gen}: nit={res.nit}, nfev={res.nfev}, "
1381 f"f={res.fun:.6e}, success={res.success}, message={res.message}")
1383 # Debug print
1384 if debug:
1385 print(f"Gen {gen}: best f={f_best}")
1387 # Check stagnation stopping criterion
1388 if f_best < f_prev_gen:
1389 stagnation_count = 0
1390 else:
1391 stagnation_count += 1
1393 if stagnation_count >= stagnation_limit:
1394 if debug:
1395 print(f"Stopping: No improvement for {stagnation_limit} generations at gen {gen}")
1396 break
1398 return g_best, f_best
1400def pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={},
1401 pop_size=100, omega=0.5, phip=0.5, phig=0.5, n_generations=100,
1402 minstep=1e-6, minfunc=1e-6, debug=False, seed=42,
1403 local_opt_every=15, initial_positions=None, local_optimizer=None,
1404 func_and_grad=None, grad_func=None):
1405 """
1406 Particle Swarm Optimization with periodic local refinement
1407 R. C. Eberhart, Y. Shi and J. Kennedy, Swarm Intelligence, CA, San Mateo:Morgan Kaufmann, 2001.
1408 https://theswissbay.ch/pdf/Gentoomen%20Library/Artificial%20Intelligence/Swarm%20Intelligence/Swarm%20intelligence%20-%20James%20Kennedy.pdf
1409 Parameters:
1410 -----------
1411 func : callable
1412 Objective function to minimize
1413 lb : array_like
1414 Lower bounds for variables
1415 ub : array_like
1416 Upper bounds for variables
1417 ieqcons : list, optional
1418 List of inequality constraint functions
1419 f_ieqcons : callable, optional
1420 Function returning array of inequality constraints
1421 args : tuple, optional
1422 Extra arguments passed to objective function
1423 kwargs : dict, optional
1424 Extra keyword arguments passed to objective function
1425 swarmsize : int, optional
1426 Number of particles in swarm
1427 omega : float, optional
1428 Inertia weight
1429 phip : float, optional
1430 Personal best weight
1431 phig : float, optional
1432 Global best weight
1433 maxiter : int, optional
1434 Maximum number of iterations
1435 minstep : float, optional
1436 Minimum step size for convergence
1437 minfunc : float, optional
1438 Minimum function improvement for convergence
1439 debug : bool, optional
1440 Whether to print debug information
1441 seed : int, optional
1442 Random seed for reproducibility
1443 local_opt_every : int, optional
1444 Frequency of local optimization (every N iterations)
1446 Returns:
1447 --------
1448 best_position : ndarray
1449 Best position found
1450 best_value : float
1451 Best function value found
1452 """
1454 # Input validation
1455 lb = np.asarray(lb)
1456 ub = np.asarray(ub)
1457 D = len(lb)
1459 assert len(lb) == len(
1460 ub), "Lower- and upper-bounds must be the same length"
1461 assert np.all(
1462 ub > lb), "All upper-bound values must be greater than lower-bound values"
1463 assert callable(func), "Invalid function handle"
1465 # Set random seed
1466 np.random.seed(seed)
1468 # Velocity bounds
1469 vhigh = np.abs(ub - lb)
1470 vlow = -vhigh
1472 # Constraint setup
1473 if f_ieqcons is not None:
1474 def cons(x):
1475 return np.asarray(f_ieqcons(x, *args, **kwargs))
1476 elif ieqcons:
1477 def cons(x):
1478 return np.asarray([c(x, *args, **kwargs) for c in ieqcons])
1479 else:
1480 def cons(x):
1481 return np.array([0.0])
1483 def is_feasible(x):
1484 return np.all(cons(x) >= 0)
1486 # Initialize swarm using Sobol sequence
1487 sampler = qmc.Sobol(d=D, scramble=True, seed=seed)
1488 sobol_sample = sampler.random_base2(m=int(np.ceil(np.log2(pop_size))))
1489 x = qmc.scale(sobol_sample[:pop_size], lb, ub)
1490 if initial_positions is not None:
1491 n_init = min(len(initial_positions), pop_size)
1492 x[:n_init] = initial_positions[:n_init]
1493 # Initialize velocities and personal bests
1494 v = vlow + np.random.rand(pop_size, D) * (vhigh - vlow)
1495 p = np.copy(x)
1496 fp = np.array([func(p[i], *args, **kwargs) for i in range(pop_size)])
1497 feasibles = np.array([is_feasible(p[i]) for i in range(pop_size)])
1499 # Initialize global best
1500 fg = np.inf
1501 g = None
1503 for i in range(pop_size):
1504 if feasibles[i] and fp[i] < fg:
1505 fg = fp[i]
1506 g = p[i].copy()
1508 # Main PSO loop
1509 it = 1
1510 while it <= n_generations:
1511 # Update velocities and positions
1512 rp = np.random.rand(pop_size, D)
1513 rg = np.random.rand(pop_size, D)
1514 v = omega * v + phip * rp * (p - x) + phig * rg * (g - x)
1515 x = np.clip(x + v, lb, ub)
1517 # Evaluate particles and update personal/global bests
1518 for i in range(pop_size):
1519 fx = func(x[i], *args, **kwargs)
1521 if fx < fp[i] and is_feasible(x[i]):
1522 p[i] = x[i].copy()
1523 fp[i] = fx
1525 if fx < fg:
1526 stepsize = np.linalg.norm(g - x[i])
1528 if debug:
1529 print(
1530 f'New best for swarm at iteration {it}: {x[i]} {fx}')
1532 # Check convergence criteria
1533 if np.abs(fg - fx) <= minfunc:
1534 print(f'Stopping: Objective improvement < {minfunc}')
1535 return x[i], fx
1536 if stepsize <= minstep:
1537 print(f'Stopping: Position change < {minstep}')
1538 return x[i], fx
1540 g = x[i].copy()
1541 fg = fx
1543 # Periodic local refinement
1544 if local_opt_every is None:
1545 pass
1546 elif it % local_opt_every == 0:
1547 if local_optimizer is not None:
1548 local_res = local_optimizer(func, g, lb, ub)
1549 elif callable(func_and_grad):
1550 bounds = list(zip(lb, ub))
1551 local_res = minimize(func_and_grad, g, args=args, method="L-BFGS-B",
1552 jac=True, bounds=bounds,
1553 options={"maxiter": 20})
1554 if not hasattr(local_res, 'recovered_from_abnormal'):
1555 local_res.recovered_from_abnormal = False
1556 elif callable(grad_func):
1557 bounds = list(zip(lb, ub))
1558 def _fg(x): return func(x), grad_func(x)
1559 local_res = minimize(_fg, g, args=args, method="L-BFGS-B",
1560 jac=True, bounds=bounds,
1561 options={"maxiter": 20})
1562 if not hasattr(local_res, 'recovered_from_abnormal'):
1563 local_res.recovered_from_abnormal = False
1564 else:
1565 local_res = robust_local_optimization(
1566 func, g, args=args, lb=lb, ub=ub, debug=False
1567 )
1569 if should_accept_local_result(local_res, fg, is_feasible, debug):
1570 if debug:
1571 print(
1572 "Gradient refinement improved")
1573 g = local_res.x.copy()
1574 fg = local_res.fun
1576 if debug:
1577 print(f'Best after iteration {it}: {g} {fg}')
1578 it += 1
1580 # Final checks
1581 print(f'Stopping: maximum iterations reached --> {n_generations}')
1582 if g is not None and not is_feasible(g):
1583 print("Warning: Optimization finished without a feasible solution.")
1585 return g, fg
1588def ecl_acquisition(mu_N, var_N, threshold=0.0):
1589 """
1590 Entropy Contour Learning (ECL) acquisition function for gddegp models.
1592 Implements the ECL acquisition function from equation (8):
1593 ECL(x | S_N, g) = -P(g(Y(x)) > 0) log P(g(Y(x)) > 0) - P(g(Y(x)) ≤ 0) log P(g(Y(x)) ≤ 0)
1595 Where g is the affine limit state function g(Y(x)) = Y(x) - T and the failure
1596 region G is defined by g(Y(x)) ≤ 0.
1598 Parameters:
1599 -----------
1600 gp_model : gddegp
1601 Trained gp model instance
1602 X : array-like, shape (n_points, n_features) or (n_features,)
1603 Points to evaluate the acquisition function
1604 rays_predict : ndarray
1605 Ray directions for prediction. Shape should be (n_dims, n_rays)
1606 params : ndarray
1607 Hyperparameters for gddegp prediction
1608 threshold : float, default=0.0
1609 Threshold T for the affine limit state function g(Y(x)) = Y(x) - T
1610 Default is 0.0, meaning failure region is defined by Y(x) ≤ 0
1612 Returns:
1613 --------
1614 ecl_values : array-like, shape (n_points,) or float
1615 ECL acquisition function values (higher values indicate more informative points)
1617 Examples:
1618 ---------
1619 >>> # After training your gddegp model
1620 >>> ecl_vals = ecl_acquisition(gp_model, candidate_points, rays, params, threshold=0.0)
1621 >>> next_point_idx = np.argmax(ecl_vals)
1622 >>> next_point = candidate_points[next_point_idx]
1623 """
1625 # Convert variance to standard deviation
1626 sigma_N = np.sqrt(np.maximum(var_N, 1e-15)) # Ensure positive variance
1628 # Handle single point case
1629 if mu_N.ndim == 0:
1630 mu_N = np.array([mu_N])
1631 if sigma_N.ndim == 0:
1632 sigma_N = np.array([sigma_N])
1634 # Compute standardized values: (μ_N(x) - T) / σ_N(x)
1635 sigma_N = np.maximum(sigma_N, 1e-10) # Numerical stability
1636 z = (mu_N - threshold) / sigma_N
1638 # Compute probabilities using standard normal CDF Φ
1639 # P(g(Y(x)) > 0) = P(Y(x) > T) = 1 - Φ((μ_N(x) - T)/σ_N(x))
1640 p_safe = 1 - norm.cdf(z) # Probability of being in safe region
1642 # Compute ECL using binary entropy: H(p) = -p*log(p) - (1-p)*log(1-p)
1643 # Clip probabilities for numerical stability
1644 p_safe = np.clip(p_safe, 1e-15, 1 - 1e-15)
1645 ecl_values = -p_safe * np.log(p_safe) - (1 - p_safe) * np.log(1 - p_safe)
1647 # Return scalar if single point, array otherwise
1648 return ecl_values[0] if len(ecl_values) == 1 else ecl_values
1651def ecl_batch_acquisition(gp_model, X, rays_predict, params, threshold=0.0, batch_size=1):
1652 """
1653 Batch ECL acquisition function that selects multiple points simultaneously.
1655 Uses a greedy approach to select the next batch_size points that maximize
1656 the ECL criterion while maintaining diversity.
1658 Parameters:
1659 -----------
1660 gp_model : gddegp
1661 Trained gddegp model instance
1662 X : array-like, shape (n_candidates, n_features)
1663 Candidate points to select from
1664 rays_predict : ndarray
1665 Ray directions for prediction
1666 params : ndarray
1667 Hyperparameters for gddegp prediction
1668 threshold : float, default=0.0
1669 Threshold for the limit state function
1670 batch_size : int, default=1
1671 Number of points to select in the batch
1673 Returns:
1674 --------
1675 selected_indices : array-like, shape (batch_size,)
1676 Indices of selected points from X
1677 selected_points : array-like, shape (batch_size, n_features)
1678 The selected points
1680 Examples:
1681 ---------
1682 >>> indices, points = ecl_batch_acquisition(gp_model, candidates, rays, params, batch_size=3)
1683 >>> next_experiments = points
1684 """
1685 X = np.atleast_2d(X)
1686 n_candidates = X.shape[0]
1688 if batch_size >= n_candidates:
1689 return np.arange(n_candidates), X
1691 selected_indices = []
1692 remaining_indices = list(range(n_candidates))
1694 for _ in range(batch_size):
1695 if not remaining_indices:
1696 break
1698 # Evaluate ECL for remaining candidates
1699 remaining_X = X[remaining_indices]
1700 ecl_values = ecl_acquisition(
1701 gp_model, remaining_X, rays_predict, params, threshold)
1703 # Select point with highest ECL value
1704 best_local_idx = np.argmax(ecl_values)
1705 best_global_idx = remaining_indices[best_local_idx]
1707 selected_indices.append(best_global_idx)
1708 remaining_indices.remove(best_global_idx)
1710 selected_indices = np.array(selected_indices)
1711 selected_points = X[selected_indices]
1713 return selected_indices, selected_points
1716def get_surrogate_gradient_ray(gp, x, params, fallback_axis=0, normalize=True, threshold=0.0):
1717 """
1718 Returns a normalized surrogate gradient direction (d x 1 column vector)
1719 at location x using the current GP model (any input dimension).
1720 If the GP mean at x is above threshold, returns -grad; else returns grad.
1722 Parameters
1723 ----------
1724 gp : object
1725 Trained GP model instance with .predict (supports arbitrary input dim)
1726 x : array-like, shape (1, d)
1727 The input location where to compute the surrogate gradient direction
1728 params : array-like
1729 GP hyperparameters
1730 fallback_axis : int, default=0
1731 Axis to use if gradient norm is zero (default: 0)
1732 normalize : bool, default=True
1733 If True, return a unit vector; else, return unnormalized gradient
1734 threshold : float, default=0.0
1735 Threshold value for sign flip
1737 Returns
1738 -------
1739 ray : ndarray, shape (d, 1)
1740 The chosen direction (as a column vector)
1741 grad : ndarray, shape (d, 1)
1742 The predicted gradient as a column vector (signed as above)
1743 """
1744 x = np.atleast_2d(x)
1745 d = x.shape[1]
1746 grad = np.zeros((d, 1))
1748 # Predict each directional derivative along standard basis
1749 for i in range(d):
1750 basis = np.zeros((d, 1))
1751 basis[i, 0] = 1.0
1752 mu = gp.predict(x, basis, params, calc_cov=False, return_deriv=True)
1753 grad[i, 0] = mu[1] # Assumes mu[0]=f(x), mu[1]=first derivative
1755 # Predict the GP mean at x (using any direction, e.g., first axis)
1756 basis0 = np.zeros((d, 1))
1757 basis0[0, 0] = 1.0
1758 mu_mean = gp.predict(x, basis0, params, calc_cov=False,
1759 return_deriv=False)[0]
1761 # Flip sign if above threshold
1762 if mu_mean > threshold:
1763 grad = -grad
1765 norm = np.linalg.norm(grad)
1766 if normalize:
1767 if norm < 1e-16:
1768 fallback = np.zeros((d, 1))
1769 fallback[fallback_axis, 0] = 1.0
1770 return fallback.reshape(-1, 1)
1771 else:
1772 return (grad / norm).reshape(-1, 1)
1773 else:
1774 return grad.reshape(-1, 1)
1777def finite_difference_gradient(gp, x, params, h=1e-5):
1778 """
1779 Compute central finite difference approximation of GP mean gradient at x.
1781 Parameters
1782 ----------
1783 gp : object
1784 Trained GP model instance
1785 x : ndarray, shape (1, d)
1786 Point at which to compute finite difference gradient
1787 params : array-like
1788 GP hyperparameters
1789 h : float
1790 Step size
1792 Returns
1793 -------
1794 grad_fd : ndarray, shape (d, 1)
1795 Central finite difference gradient estimate
1796 """
1797 x = np.atleast_2d(x)
1798 d = x.shape[1]
1799 grad_fd = np.zeros((d, 1))
1800 ray0 = np.zeros((d, 1))
1801 # Direction for mean prediction; can be any since return_deriv=False
1802 ray0[0, 0] = 1.0
1804 # Evaluate GP mean at central point and shifted points
1805 for i in range(d):
1806 dx = np.zeros_like(x)
1807 dx[0, i] = h
1809 f_plus = gp.predict(x + dx, ray0, params,
1810 calc_cov=False, return_deriv=False)
1811 f_minus = gp.predict(x - dx, ray0, params,
1812 calc_cov=False, return_deriv=False)
1813 grad_fd[i, 0] = (f_plus[0] - f_minus[0]) / (2 * h)
1814 return grad_fd
1817def check_gp_gradient(gp, x, params, h=1e-5, fallback_axis=0):
1818 """
1819 Compare GP-predicted gradient vs finite-difference at x.
1820 Prints and returns both.
1821 """
1822 # Surrogate-predicted gradient
1823 grad_sur = get_surrogate_gradient_ray(
1824 gp, x, params, fallback_axis=fallback_axis, normalize=False)
1825 # Finite difference gradient
1826 grad_fd = finite_difference_gradient(gp, x, params, h=h)
1828 print("Surrogate GP gradient:\n", grad_sur.flatten())
1829 print("Finite-difference gradient:\n", grad_fd.flatten())
1830 print("Absolute error:\n", np.abs(grad_sur.flatten() - grad_fd.flatten()))
1831 print("Relative error:\n", np.abs(grad_sur.flatten() -
1832 grad_fd.flatten()) / (np.abs(grad_fd.flatten()) + 1e-15))
1834 return grad_sur, grad_fd
1837def get_entropy_ridge_direction_nd(gp, x, params, threshold=0.0, h=1e-5,
1838 fallback_axis=0, normalize=True, random_dir=False, seed=None):
1839 """
1840 Get a direction tangent to the entropy level set ("ridge direction") at x.
1841 In higher dimensions, returns either a single direction or an orthonormal basis for the tangent space.
1843 Parameters
1844 ----------
1845 gp : object
1846 Trained GP model instance with .predict
1847 x : array-like, shape (1, d)
1848 The input location
1849 params : array-like
1850 GP hyperparameters
1851 threshold : float
1852 ECL threshold
1853 h : float
1854 Finite difference step
1855 fallback_axis : int
1856 Axis for fallback if gradient is zero
1857 normalize : bool
1858 Normalize output direction
1859 random_dir : bool
1860 If True, return a random direction in the ridge (level set). If False, returns first basis vector.
1861 seed : int or None
1862 For reproducible random direction
1864 Returns
1865 -------
1866 ridge_dir : ndarray, shape (d, 1)
1867 Ridge direction (tangent to entropy level set) at x
1868 grad_H : ndarray, shape (d, 1)
1869 Gradient of entropy at x
1870 basis : ndarray, shape (d, d-1)
1871 (If requested) Orthonormal basis for tangent space to entropy level set at x
1872 """
1873 from utils import ecl_acquisition
1875 x = np.atleast_2d(x)
1876 d = x.shape[1]
1878 # Entropy at x
1879 def entropy_func(x0):
1880 x0 = np.atleast_2d(x0)
1881 ray0 = np.zeros((d, 1))
1882 ray0[0, 0] = 1.0
1883 mu, var = gp.predict(
1884 x0, ray0, params, calc_cov=True, return_deriv=False)
1885 return float(ecl_acquisition(mu, var, threshold=threshold))
1887 grad_H = np.zeros((d, 1))
1888 for i in range(d):
1889 dx = np.zeros_like(x)
1890 dx[0, i] = h
1891 grad_H[i, 0] = (entropy_func(x + dx) - entropy_func(x - dx)) / (2 * h)
1893 # Edge case: gradient nearly zero
1894 # grad_norm = np.linalg.norm(grad_H)
1895 # if grad_norm < 1e-12:
1896 # fallback = np.zeros((d, 1))
1897 # fallback[fallback_axis, 0] = 1.0
1898 # return fallback, grad_H, None
1900 # Orthonormal basis for null space (tangent space to level set)
1901 # null_space returns (d, d-1) matrix: each column is a basis vector
1902 basis = null_space(grad_H.T) # shape (d, d-1)
1904 # Select a direction from the tangent space
1905 if random_dir:
1906 rng = np.random.default_rng(seed)
1907 coeffs = rng.standard_normal(basis.shape[1])
1908 ridge_dir = basis @ coeffs
1909 if normalize:
1910 ridge_dir = ridge_dir / np.linalg.norm(ridge_dir)
1911 ridge_dir = ridge_dir.reshape(-1, 1)
1912 else:
1913 # Return first basis vector (deterministic)
1914 ridge_dir = basis[:, 0]
1915 if normalize:
1916 ridge_dir = ridge_dir / np.linalg.norm(ridge_dir)
1917 ridge_dir = ridge_dir.reshape(-1, 1)
1919 return ridge_dir
1922def get_entropy_ridge_direction_nd_2(gp, x, params, threshold=0.0, h=1e-5,
1923 fallback_axis=0, normalize=True, random_dir=False, seed=None):
1924 """
1925 Get a direction tangent to the entropy level set ("ridge direction") at x.
1926 In higher dimensions, returns either a single direction or an orthonormal basis for the tangent space.
1928 Parameters
1929 ----------
1930 gp : object
1931 Trained GP model instance with .predict
1932 x : array-like, shape (1, d)
1933 The input location
1934 params : array-like
1935 GP hyperparameters
1936 threshold : float
1937 ECL threshold
1938 h : float
1939 Finite difference step
1940 fallback_axis : int
1941 Axis for fallback if gradient is zero
1942 normalize : bool
1943 Normalize output direction
1944 random_dir : bool
1945 If True, return a random direction in the ridge (level set). If False, returns first basis vector.
1946 seed : int or None
1947 For reproducible random direction
1949 Returns
1950 -------
1951 ridge_dir : ndarray, shape (d, 1)
1952 Ridge direction (tangent to entropy level set) at x
1953 grad_H : ndarray, shape (d, 1)
1954 Gradient of entropy at x
1955 basis : ndarray, shape (d, d-1)
1956 (If requested) Orthonormal basis for tangent space to entropy level set at x
1957 """
1958 grad_H = get_surrogate_gradient_ray(
1959 gp, x, params, fallback_axis=0, normalize=True, threshold=0.0)
1961 # Edge case: gradient nearly zero
1962 # grad_norm = np.linalg.norm(grad_H)
1963 # if grad_norm < 1e-12:
1964 # fallback = np.zeros((d, 1))
1965 # fallback[fallback_axis, 0] = 1.0
1966 # return fallback, grad_H, None
1968 # Orthonormal basis for null space (tangent space to level set)
1969 # null_space returns (d, d-1) matrix: each column is a basis vector
1970 basis = null_space(grad_H.T) # shape (d, d-1)
1972 best_grad = np.inf
1973 best_dir = None
1974 for idx in range(basis.shape[1]):
1975 v = basis[:, idx].reshape(-1, 1)
1976 mu = gp.predict(x, v, params, calc_cov=False, return_deriv=True)
1977 if gp.n_order >= 2:
1978 second_deriv = mu[2]
1979 else:
1980 # Use central finite difference on the GP's directional derivative
1981 x_fwd = x + h * v.T
1982 x_bwd = x - h * v.T
1983 # Predict first derivative at forward and backward positions
1984 mu_fwd = gp.predict(
1985 x_fwd, v, params, calc_cov=False, return_deriv=True)
1986 mu_bwd = gp.predict(
1987 x_bwd, v, params, calc_cov=False, return_deriv=True)
1988 # mu_fwd[1] and mu_bwd[1] are first directional derivatives at shifted positions
1989 second_deriv = (mu_fwd[1] - mu_bwd[1]) / (2 * h)
1990 if abs(second_deriv) < best_grad:
1991 best_grad = abs(second_deriv)
1992 best_dir = v
1994 return best_dir
1997def sobol_points(n_points, box, seed=0):
1998 d = len(box)
1999 sampler = qmc.Sobol(d=d, scramble=True, seed=seed)
2000 m = int(np.ceil(np.log2(n_points)))
2001 pts = sampler.random_base2(m=m)[:n_points]
2002 for j, (lo, hi) in enumerate(box):
2003 pts[:, j] = lo + (hi - lo) * pts[:, j]
2004 return pts
2007def local_box_around_point(x_next, delta):
2008 # x_next: shape (1, d) or (d,)
2009 x_next = np.atleast_2d(x_next).reshape(-1)
2010 d = x_next.shape[0]
2011 return [(float(x_next[j] - delta), float(x_next[j] + delta)) for j in range(d)]
2014# @profile
2015# def maximize_ier_direction(
2016# gp, x_next, x_train, y_blocks, rays_array, params, box, threshold=0.0, n_integration=500, seed=123, delta=.5
2017# ):
2018# """
2019# Maximizes Integrated Entropy Reduction (IER) at x_next over directions v (unit norm).
2020# """
2021# d = x_next.shape[1]
2022# # 1. Integration points for Monte Carlo estimate
2023# local_box = local_box_around_point(x_next, delta)
2024# integration_points = sobol_points(n_integration, local_box, seed)
2025# # Use a default ray for integration points (e.g., all along first axis)
2026# ray0 = np.zeros((d, 1))
2027# ray0[0, 0] = 1.0
2028# integration_rays = np.tile(ray0, n_integration)
2030# # 2. Get current mean/variance at integration points
2031# mu_before, var_before = gp.predict(
2032# integration_points, integration_rays, params, calc_cov=True, return_deriv=False
2033# )
2034# from utils import ecl_acquisition
2035# ecl_before = ecl_acquisition(mu_before, var_before, threshold=threshold)
2037# @profile
2038# def negative_ier(w):
2039# v = w.reshape(-1, 1)
2040# norm_v = np.linalg.norm(v)
2041# if norm_v < 1e-12:
2042# return 1e6
2043# v = v / norm_v
2045# Y_blocks = [y.copy() for y in y_blocks]
2046# # --- Normalize if needed ---
2047# v_ = v
2048# integration_rays_ = integration_rays
2049# # utils.check_gp_gradient(gp, x_next, previous_params)
2050# # Hypercomplex construction for new point
2052# # Compute GP predictions (function value and derivatives)
2053# y_pred = gp.predict(x_next, v_, params,
2054# calc_cov=False, return_deriv=True).ravel()
2056# # How many derivatives do you want to include?
2057# n_order = gp.n_order
2059# # Rebuild y_blocks_next to include the GP predictions
2060# Y_blocks_next = []
2062# # Append the function prediction
2063# Y_blocks_next.append(y_pred[0].reshape(-1, 1))
2065# # Append derivatives in order (1st, 2nd, ..., n_order)
2066# for i in range(1, n_order + 1):
2067# Y_blocks_next.append(y_pred[i].reshape(-1, 1))
2069# # Augment training set
2070# X_train = np.vstack([x_train, x_next])
2071# for k in range(len(y_blocks)):
2072# Y_blocks[k] = np.vstack([Y_blocks[k], Y_blocks_next[k]])
2073# rays_array_tmp = np.concatenate([rays_array, v_], axis=1)
2075# gp_temp = gddegp.gddegp(X_train, Y_blocks,
2076# n_order=gp.n_order,
2077# rays_array=rays_array_tmp,
2078# normalize=True,
2079# kernel="SE",
2080# kernel_type="anisotropic",)
2082# mu, var_after = gp_temp.predict(
2083# integration_points, integration_rays_, params, calc_cov=True, return_deriv=False)
2084# ecl_after = ecl_acquisition(
2085# mu, var_after, threshold=threshold)
2086# ier = np.mean(ecl_before - ecl_after)
2087# return -ier
2088# # 4. Optimize using L-BFGS-B, with a couple random restarts for robustness
2090# D = d # dimension of direction
2091# lb = np.full(D, -2.0)
2092# ub = np.full(D, 2.0)
2093# # Optionally, can try [-1, 1] or any sufficiently wide box (PSO will normalize inside objective)
2095# best_w, best_val = pso(
2096# negative_ier, lb, ub,
2097# pop_size=10*d, maxiter=11, debug=True, seed=seed + 77
2098# )
2099# v_opt = best_w.reshape(-1, 1)
2100# v_opt /= np.linalg.norm(v_opt)
2102# return v_opt
2105# utils.py
2108# UPDATED function to use the new bounds logic
2109# def find_next_point(
2110# gp,
2111# params,
2112# x_train,
2113# y_train_list,
2114# dist_params,
2115# acquisition_func,
2116# integration_points = None,
2117# n_candidate_points: int = 1024,
2118# n_local_starts: int = 10,
2119# use_agg_al = False,
2120# var_hist = [],
2121# **acq_kwargs
2122# ):
2123# """Finds the next point using distribution-aware optimization bounds."""
2125# if not use_agg_al:
2126# # --- Stage 1: Coarse Global Search ---
2127# shape, scale = get_pdf_params(dist_params)
2129# sampler = Sobol(d=x_train.shape[1], scramble=True)
2130# coarse_points_unit = sampler.random(n=n_candidate_points)
2131# candidate_points = get_inverse(dist_params, coarse_points_unit
2132# )
2134# candidate_scores = acquisition_func(
2135# gp, params, x_train, y_train_list,
2136# candidate_points, integration_points = integration_points, normalize=gp.normalize
2137# )
2139# top_indices = np.argsort(candidate_scores.ravel())[-n_local_starts:]
2140# top_starting_points = candidate_points[top_indices]
2142# # --- Stage 2: Local Optimization ---
2144# # Get the correct bounds for the optimizer
2145# bounds = get_optimization_bounds(dist_params)
2146# all_unbounded = all(b[0] is None for b in bounds)
2148# def objective_function(x: np.ndarray) -> float:
2149# x_candidate = x.reshape(1, -1)
2150# score = acquisition_func(
2151# gp, params, x_train, y_train_list,
2152# x_candidate, integration_points = integration_points, normalize=gp.normalize
2153# )
2154# return -score[0]
2156# best_x = None
2157# best_score = -np.inf
2159# for start_point in top_starting_points:
2160# if all_unbounded:
2161# res = minimize(fun=objective_function, x0=start_point, method="BFGS")
2162# else:
2163# res = minimize(fun=objective_function, x0=start_point, method="L-BFGS-B", bounds=bounds)
2165# if -res.fun > best_score:
2166# best_score = -res.fun
2167# best_x = res.x
2169# return best_x.reshape(1, -1), best_score
2170# else:
2171# agg_variance = acquisition_func(var_hist)
2172# max_index = np.argmax(agg_variance)
2173# return integration_points[max_index,:], agg_variance[0,max_index]
2175def _ensure_2d(x):
2176 x = np.asarray(x)
2177 if x.ndim == 1:
2178 return x.reshape(1, -1)
2179 return x
2181def _format_y_to_add(y_pred):
2182 """Convert gp.predict output at one x to list of 2D arrays for y_train_list append."""
2183 if isinstance(y_pred, (list, tuple)):
2184 return [np.atleast_2d(np.asarray(e)) for e in y_pred]
2185 else:
2186 return [np.atleast_2d(np.asarray(y_pred))]
2189# def sample_from_dist(dist, means, var, shape, scale, size):
2190# num_cols = len(dist)
2191# num_rows = int(size)
2192# samples = np.zeros((num_rows, num_cols))
2193# for i in range(0, num_cols):
2194# if dist[i] == 'N': # normal (Gaussian)
2195# samples[:, i] = stats.norm.rvs(
2196# loc=shape[i], scale=scale[i], size=size)
2197# elif dist[i] == 'U': # uniform
2198# samples[:, i] = stats.uniform.rvs(
2199# loc=shape[i], scale=scale[i], size=size)
2200# elif dist[i] == 'LN': # log-normal
2201# samples[:, i] = stats.lognorm.rvs(
2202# scale[i], scale=np.exp(shape[i]), size=size)
2203# # elif dist[i] == 'B': # beta
2204# # frozen_dist = stats.beta(a=shape[i], b=scale[i])
2205# # transformed_samples[:, i] = frozen_dist.ppf(samples[:, i])
2206# elif dist[i] == 'T': # triangle (symmetric)
2207# c = .5
2208# samples[:, i] = stats.triang.rvs(
2209# c, loc=shape[i], scale=scale[i], size=size)
2210# elif dist[i] == 'UTN':
2211# # samples[:, i] = stats.uniform.rvs(
2212# # loc=shape[i], scale=scale[i] - shape[i], size=size)
2213# cf = 3
2214# # lb = shape[i] + cf*shape[i]
2215# # ub = scale[i] - cf*shape[i]
2216# # transformed_samples[:, i] = (
2217# # ub - lb)*samples[:, i] + lb
2218# a, b = (shape[i] - means[i]) / \
2219# (cf*var[i]*means[i]), (scale[i] -
2220# means[i]) / (cf*var[i]*means[i])
2221# samples[:, i] = stats.truncnorm.rvs(
2222# a, b, loc=means[i], scale=cf*var[i]*means[i], size=size)
2224# return samples
2225def get_pdf_params(dist_params):
2226 """
2227 Computes scipy-specific loc/scale parameters. Prioritizes explicit bounds if provided.
2228 """
2229 dists = dist_params['dists']
2230 nVar = len(dists)
2231 rv_a = np.zeros(nVar) # loc
2232 rv_b = np.zeros(nVar) # scale
2234 # Check for optional explicit bounds
2235 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params
2237 for i, dist_name in enumerate(dists):
2238 # --- Logic for when explicit bounds ARE provided ---
2239 if has_bounds and dist_name in ['U', 'TN']:
2240 lb = dist_params['lower_bounds'][i]
2241 ub = dist_params['upper_bounds'][i]
2242 if dist_name == 'U':
2243 rv_a[i] = lb # loc = lower bound
2244 rv_b[i] = ub - lb # scale = width
2245 elif dist_name == 'TN':
2246 rv_a[i] = dist_params['means'][i]
2247 rv_b[i] = np.sqrt(dist_params['vars'][i])
2249 # --- Fallback logic when bounds are NOT provided ---
2250 else:
2251 stdev = np.sqrt(dist_params['vars'][i])
2252 mean = dist_params['means'][i]
2253 if dist_name == 'U':
2254 rv_a[i] = mean - np.sqrt(3) * stdev
2255 rv_b[i] = 2 * np.sqrt(3) * stdev
2256 elif dist_name == 'N':
2257 rv_a[i] = mean
2258 rv_b[i] = stdev
2259 # (Add other fallbacks here if needed)
2261 return rv_a, rv_b
2263def get_optimization_bounds(dist_params):
2264 """
2265 Determines optimization bounds. Prioritizes explicit bounds if provided.
2266 """
2267 bounds = []
2268 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params
2270 for i, dist_name in enumerate(dist_params['dists']):
2271 # --- Use explicit bounds if available for bounded distributions ---
2272 if has_bounds and dist_name in ['U', 'TN', 'B']:
2273 bounds.append((dist_params['lower_bounds'][i], dist_params['upper_bounds'][i]))
2275 # --- Fallback logic or for other distributions ---
2276 elif dist_name == 'B': # Beta is on [0, 1] by default
2277 bounds.append((0, 1))
2278 elif dist_name in ['N', 'LN']: # Unbounded
2279 bounds.append((None, None))
2280 else: # Fallback for U/TN if bounds were not explicitly given
2281 stdev = np.sqrt(dist_params['vars'][i])
2282 mean = dist_params['means'][i]
2283 if dist_name == 'U':
2284 lb = mean - np.sqrt(3) * stdev
2285 ub = mean + np.sqrt(3) * stdev
2286 bounds.append((lb, ub))
2287 elif dist_name == 'TN':
2288 lb = mean - 3 * stdev
2289 ub = mean + 3 * stdev
2290 bounds.append((lb, ub))
2291 else:
2292 bounds.append((None, None))
2293 return bounds
2296def get_inverse(dist_params, samples):
2297 """
2298 Transforms uniform samples to a specified distribution via inverse CDF.
2299 """
2300 dists = dist_params['dists']
2301 loc_params, scale_params = get_pdf_params(dist_params)
2302 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params
2304 transformed_samples = np.zeros(samples.shape)
2305 for i in range(samples.shape[1]):
2306 dist_name = dists[i]
2307 loc = loc_params[i]
2308 scale = scale_params[i]
2310 if dist_name == 'TN':
2311 mean = dist_params['means'][i]
2312 stdev = np.sqrt(dist_params['vars'][i])
2313 if has_bounds:
2314 lb = dist_params['lower_bounds'][i]
2315 ub = dist_params['upper_bounds'][i]
2316 else: # Fallback
2317 lb = mean - 3 * stdev
2318 ub = mean + 3 * stdev
2320 a, b = (lb - mean) / stdev, (ub - mean) / stdev
2321 frozen_dist = stats.truncnorm(a, b, loc=mean, scale=stdev)
2323 elif dist_name == 'U':
2324 frozen_dist = stats.uniform(loc=loc, scale=scale)
2326 elif dist_name == 'N':
2327 frozen_dist = stats.norm(loc=loc, scale=scale)
2329 # (Add other distributions here)
2331 transformed_samples[:, i] = frozen_dist.ppf(samples[:, i])
2332 return transformed_samples
2334# (The find_next_point function does not need to be changed from the previous version)