Coverage for jetgp/utils.py: 51%

1import numpy as np

2import pyoti.core as coti

3import pyoti.sparse as oti

4import sympy as sp

5from scipy.stats import qmc

6from scipy.optimize import minimize

7from scipy.stats import norm

8import sys

9from scipy.linalg import null_space

10#from full_gddegp import gddegp

11#from line_profiler import profile

12from scipy.stats.qmc import Sobol

13import scipy.stats as stats

14from fractions import Fraction

15from scipy.special import comb

16from numpy.polynomial import Polynomial

17import math

18#import utils

19from jetgp.full_degp import degp_utils

22def scale_samples(samples, lower_bounds, upper_bounds):

23 """

24 Scale each column of samples from the unit interval [0, 1] to user-defined bounds [lb_j, ub_j].

26 Parameters:

27 ----------

28 samples : ndarray of shape (d, n)

29 A 2D array where each column represents a sample in [0, 1]^n.

30 lower_bounds : array-like of length n

31 Lower bounds for each dimension.

32 upper_bounds : array-like of length n

33 Upper bounds for each dimension.

35 Returns:

36 -------

37 ndarray of shape (d, n)

38 Scaled samples where each column is mapped from [0, 1] to [lb_j, ub_j] for each dimension.

40 Notes:

41 -----

42 This function assumes that each sample is a column vector, and bounds are applied column-wise.

44 Example:

45 --------

46 >>> samples = np.array([[0.5, 0.2], [0.8, 0.4]])

47 >>> lower_bounds = [0, 1]

48 >>> upper_bounds = [1, 3]

49 >>> scale_samples(samples, lower_bounds, upper_bounds)

50 array([[0.5, 1.4],

51 [0.8, 1.8]])

52 """

54 samples = np.asarray(samples)

55 lower_bounds = np.asarray(lower_bounds)

56 upper_bounds = np.asarray(upper_bounds)

58 # Ensure correct shapes

59 assert (

60 samples.shape[1] == len(lower_bounds) == len(upper_bounds)

61 ), "Dimension mismatch between samples and bounds"

63 # Reshape bounds to broadcast across rows

64 lb = lower_bounds[np.newaxis, :]

65 ub = upper_bounds[np.newaxis, :]

67 return lb + samples * (ub - lb)

70def flatten_der_indices(indices):

71 """

72 Flatten a nested list of derivative indices.

74 Parameters:

75 ----------

76 indices : list of lists

77 A nested list where each sublist contains derivative index specifications.

79 Returns:

80 -------

81 list

82 A single flattened list containing all derivative index entries.

84 Example:

85 --------

86 >>> indices = [[[1, 1]], [[1, 2], [2, 1]]]

87 >>> flatten_der_indices(indices)

88 [[1, 1], [1, 2], [2, 1]]

89 """

91 flattened_indices = []

92 for i in range(0, len(indices)):

93 for j in range(0, len(indices[i])):

94 flattened_indices.append(indices[i][j])

95 return flattened_indices

98def nrmse(y_true, y_pred, norm_type="minmax"):

99 """

100 Compute the Normalized Root Mean Squared Error (NRMSE) between true and predicted values.

101

102 Parameters:

103 ----------

104 y_true : array-like

105 Ground truth or reference values.

106 y_pred : array-like

107 Predicted values to compare against the ground truth.

108 norm_type : str, default="minmax"

109 The method used to normalize the RMSE:

110 - 'minmax': Normalize by the range (max - min) of `y_true`.

111 - 'mean': Normalize by the mean of `y_true`.

112 - 'std': Normalize by the standard deviation of `y_true`.

113

114 Returns:

115 -------

116 float

117 The normalized root mean squared error.

118

119 Raises:

120 -------

121 ValueError

122 If `norm_type` is not one of {'minmax', 'mean', 'std'}.

123

124 Example:

125 --------

126 >>> y_true = np.array([3, 5, 2, 7])

127 >>> y_pred = np.array([2.5, 5.5, 2, 8])

128 >>> nrmse(y_true, y_pred, norm_type="mean")

129 0.1443 # Example value (varies depending on input)

130 """

131

132 y_true = np.asarray(y_true)

133 y_pred = np.asarray(y_pred)

134

135 mse = np.mean((y_true - y_pred) ** 2)

136 rmse = np.sqrt(mse)

137

138 if norm_type == "minmax":

139 norm = np.max(y_true) - np.min(y_true)

140 elif norm_type == "mean":

141 norm = np.mean(y_true)

142 elif norm_type == "std":

143 norm = np.std(y_true)

144 else:

145 raise ValueError("norm_type must be 'minmax', 'mean', or 'std'")

146

147 return rmse / norm if norm != 0 else np.inf

148

149

150def convert_index_to_exponent_form(lst):

151 """

152 Convert a list of indices into exponent form.

153

154 For a given list of integers, the function compresses consecutive identical elements

155 into pairs [value, count], where 'value' is the element and 'count' is its occurrence.

156

157 Parameters:

158 ----------

159 lst : list of int

160 A list of integers representing variable indices.

161

162 Returns:

163 -------

164 list of [int, int]

165 A compressed list where each entry is [value, count], representing the multiplicity

166 of each unique value in the original list.

167

168 Example:

169 --------

170 >>> lst = [1, 1, 2, 2, 2, 3]

171 >>> convert_index_to_exponent_form(lst)

172 [[1, 2], [2, 3], [3, 1]]

173 """

174

175 compressed = []

176 current_num = None

177 count = 0

178

179 for num in lst:

180 if num != current_num:

181 if current_num is not None:

182 compressed.append([current_num, count])

183 current_num = num

184 count = 1

185 else:

186 count += 1

187

188 if current_num is not None:

189 compressed.append([current_num, count])

190

191 return compressed

192

193

194def build_companion_array(nvars, order, der_indices):

195 """

196 Build a companion array that maps each derivative index to its corresponding order in the OTI basis.

197

198 The array represents the order of each component in the OTI (Order-Truncated Imaginary) number system:

199 - 0 for function values.

200 - Corresponding derivative order for each derivative term.

201

202 Parameters:

203 ----------

204 nvars : int

205 Number of variables (input dimensions).

206 order : int

207 Maximum derivative order considered.

208 der_indices : list of lists

209 Derivative indices in exponent form, where each sublist represents the derivative multi-index

210 for a specific derivative term.

211

212 Returns:

213 -------

214 companion_array : ndarray

215 A 1D array of length (1 + total derivatives), where:

216 - The first entry is 0 (function value).

217 - Each subsequent entry indicates the derivative order (e.g., 1 for first derivatives).

218

219 Example:

220 --------

221 >>> nvars = 2

222 >>> order = 2

223 >>> der_indices = [[[1, 1]], [[1, 2]], [[2, 1]]]

224 >>> build_companion_array(nvars, order, der_indices)

225 array([0, 1, 2, 1])

226 """

227 companion_list = [0]

228 for i in range(len(der_indices)):

229 for j in range(0, len(der_indices[i])):

230 companion_list.append(

231 compare_OTI_indices(nvars, order, der_indices[i][j]))

232

233 companion_array = np.array(companion_list)

234 return companion_array

235

236def build_companion_array_predict(nvars, order, der_indices):

237 """

238 Build a companion array that maps each derivative index to its corresponding order in the OTI basis.

239

240 The array represents the order of each component in the OTI (Order-Truncated Imaginary) number system:

241 - 0 for function values.

242 - Corresponding derivative order for each derivative term.

243

244 Parameters:

245 ----------

246 nvars : int

247 Number of variables (input dimensions).

248 order : int

249 Maximum derivative order considered.

250 der_indices : list of lists

251 Derivative indices in exponent form, where each sublist represents the derivative multi-index

252 for a specific derivative term.

253

254 Returns:

255 -------

256 companion_array : ndarray

257 A 1D array of length (1 + total derivatives), where:

258 - The first entry is 0 (function value).

259 - Each subsequent entry indicates the derivative order (e.g., 1 for first derivatives).

260

261 Example:

262 --------

263 >>> nvars = 2

264 >>> order = 2

265 >>> der_indices = [[[1, 1]], [[1, 2]], [[2, 1]]]

266 >>> build_companion_array(nvars, order, der_indices)

267 array([0, 1, 2, 1])

268 """

269 companion_list = [0]

270 for i in range(len(der_indices)):

271 companion_list.append(

272 compare_OTI_indices(nvars, order, der_indices[i]))

273

274 companion_array = np.array(companion_list)

275 return companion_array

276

277

278def compare_OTI_indices(nvars, order, term_check):

279 """

280 Compare a given multi-index term against all basis terms in the OTI number system.

281

282 This function searches through all OTI basis terms (multi-indices) up to the specified order

283 and identifies the **order** of the matching term.

284

285 Parameters:

286 ----------

287 nvars : int

288 Number of variables (input dimensions).

289 order : int

290 Maximum derivative order considered.

291 term_check : list of [int, int]

292 The multi-index to check, given in exponent form [[var_index, exponent], ...].

293

294 Returns:

295 -------

296 int

297 The order of the term in the OTI basis (1, 2, ..., order).

298 Returns -1 if no matching term is found.

299

300 Example:

301 --------

302 >>> nvars = 2

303 >>> order = 2

304 >>> term_check = [[1, 2]] # Represents ∂²/∂x₁²

305 >>> compare_OTI_indices(nvars, order, term_check)

306 2

307 """

308 dH = coti.get_dHelp()

309

310 for ordi in range(1, order + 1):

311 nterms = coti.ndir_order(nvars, ordi)

312 for idx in range(nterms):

313 term = convert_index_to_exponent_form(dH.get_fulldir(idx, ordi))

314 if np.array_equal(term, term_check):

315 return ordi

316 return -1

317

318

319def gen_OTI_indices(nvars, order):

320 """

321 Generate the list of OTI (Order-Truncated Imaginary) basis indices in exponent form.

322

323 For a given number of variables and maximum derivative order, this function produces the

324 multi-index representations for all basis terms in the OTI number system.

325

326 Parameters:

327 ----------

328 nvars : int

329 Number of variables (input dimensions).

330 order : int

331 Maximum derivative order considered.

332

333 Returns:

334 -------

335 list of lists

336 A nested list where:

337 - The outer list has length `order` (one entry per derivative order).

338 - Each inner list contains multi-indices in exponent form for that order.

339

340 Example:

341 --------

342 >>> nvars = 2

343 >>> order = 2

344 >>> gen_OTI_indices(nvars, order)

345 [

346 [[[1, 1]], [[2, 1]]], # First-order derivatives: ∂/∂x₁, ∂/∂x₂

347 # Second-order: ∂²/∂x₁², ∂²/∂x₁∂x₂, ∂²/∂x₂²

348 [[[1, 2]], [[1, 1], [2, 1]], [[2, 2]]]

349 ]

350 """

351 dH = coti.get_dHelp()

352

353 ind = [0] * order

354

355 for ordi in range(1, order + 1):

356 nterms = coti.ndir_order(nvars, ordi)

357 i = [0] * nterms

358 for idx in range(nterms):

359 i[idx] = convert_index_to_exponent_form(dH.get_fulldir(idx, ordi))

360 ind[ordi - 1] = i

361 return ind

362

363

364def reshape_y_train(y_train):

365 """

366 Flatten and concatenate function values and derivative observations into a single 1D array.

367

368 Parameters:

369 ----------

370 y_train : list of arrays

371 A list where:

372 - y_train[0] contains the function values (shape: (n_samples,))

373 - y_train[1:], if present, contain derivative values (shape: (n_samples,)) for each derivative component.

374

375 Returns:

376 -------

377 ndarray of shape (n_total,)

378 A flattened 1D array concatenating function values and all derivatives.

379

380 Example:

381 --------

382 >>> y_train = [np.array([1.0, 2.0]), np.array([0.5, 1.0])]

383 >>> reshape_y_train(y_train)

384 array([1.0, 2.0, 0.5, 1.0])

385 """

386 y_train_flattened = y_train[0]

387 for i in range(1, len(y_train)):

388 y_train_flattened = np.vstack(

389 (y_train_flattened.reshape(-1, 1), y_train[i].reshape(-1, 1)))

390 return y_train_flattened.flatten()

391

392

393def transform_cov(cov, sigma_y, sigmas_x, der_indices, X_test):

394 """

395 Rescale the diagonal of a covariance matrix to reflect the original (unnormalized) variance

396 of function values and derivatives.

397

398 This function transforms the variance estimates from normalized space back to the original scale.

399

400 Parameters:

401 ----------

402 cov : ndarray of shape (n_total, n_total)

403 Covariance matrix from the GP model (including function values and derivatives).

404 sigma_y : float

405 Standard deviation used to normalize the function values.

406 sigmas_x : ndarray of shape (1, nvars)

407 Standard deviations used to normalize each input dimension.

408 der_indices : list of lists

409 Derivative multi-indices, where each sublist represents the derivative directions and orders.

410 X_test : ndarray of shape (n_samples, nvars)

411 Test input points corresponding to the covariance matrix blocks.

412

413 Returns:

414 -------

415 y_var_normalized : ndarray of shape (n_total,)

416 Rescaled variances for function values and derivatives in the original space.

417

418 Example:

419 --------

420 >>> cov.shape = (n_total, n_total)

421 >>> transform_cov(cov, sigma_y, sigmas_x, der_indices, X_test).shape == (n_total,)

422 """

423 var = abs(np.diag(cov))

424 y_var_normalized = np.zeros((var.shape[0],))

425 y_var_normalized[0:X_test.shape[0]] = var[0:X_test.shape[0]]*sigma_y**2

426 for i in range(len(der_indices)):

427 factor = 1

428 for j in range(len(der_indices[i])):

429 factor = factor * \

430 (sigmas_x[0][der_indices[i][j][0] - 1])**(der_indices[i][j][1])

431 factor = sigma_y**2 / factor**2

432 y_var_normalized[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]

433 ] = var[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]]*factor

434 return y_var_normalized

435

436

437def transform_cov_directional(cov, sigma_y, sigmas_x, der_indices, X_test):

438 """

439 Rescale the diagonal of a covariance matrix for function values and directional derivatives.

440

441 Unlike `transform_cov`, this function assumes **directional derivatives** (not multi-index derivatives),

442 so no input scaling (`sigmas_x`) is applied to derivative terms.

443

444 Parameters:

445 ----------

446 cov : ndarray of shape (n_total, n_total)

447 Covariance matrix from the GP model (including function values and directional derivatives).

448 sigma_y : float

449 Standard deviation used to normalize the function values.

450 sigmas_x : ndarray of shape (1, nvars)

451 Standard deviations used to normalize each input dimension (unused for derivatives here).

452 der_indices : list of lists

453 Derivative directions (for directional derivatives).

454 X_test : ndarray of shape (n_samples, nvars)

455 Test input points corresponding to the covariance matrix blocks.

456

457 Returns:

458 -------

459 y_var_normalized : ndarray of shape (n_total,)

460 Rescaled variances for function values and directional derivatives in the original space.

461

462 Example:

463 --------

464 >>> cov.shape = (n_total, n_total)

465 >>> transform_cov_directrional(cov, sigma_y, sigmas_x, der_indices, X_test).shape == (n_total,)

466 """

467 var = abs(np.diag(cov))

468 y_var_normalized = np.zeros((var.shape[0],))

469 y_var_normalized[0:X_test.shape[0]] = var[0:X_test.shape[0]]*sigma_y**2

470 for i in range(len(der_indices)):

471 factor = sigma_y**2

472 y_var_normalized[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]

473 ] = var[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]]*factor

474 return y_var_normalized

475

476

477def transform_predictions(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test):

478 """

479 Rescale predicted function values and derivatives from normalized space back to their original scale.

480

481 This function transforms both function value predictions and **multi-index derivatives**

482 back to the original units after GP prediction.

483

484 Parameters:

485 ----------

486 y_pred : ndarray of shape (n_total,)

487 Predicted mean values from the GP model in normalized space

488 (includes function values and derivatives).

489 mu_y : float

490 Mean of the original function values (before normalization).

491 sigma_y : float

492 Standard deviation of the original function values (before normalization).

493 sigmas_x : ndarray of shape (1, nvars)

494 Standard deviations of the input variables (used for rescaling derivatives).

495 der_indices : list of lists

496 Multi-index derivative structures for each derivative component.

497 X_test : ndarray of shape (n_samples, nvars)

498 Test input points corresponding to the prediction blocks.

499

500 Returns:

501 -------

502 y_train_normalized : ndarray of shape (n_total, 1)

503 Rescaled function values and derivatives in the original scale.

504

505 Example:

506 --------

507 >>> y_pred.shape = (n_total,)

508 >>> transform_predictions(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test).shape

509 (n_total, 1)

510 """

511 y_train_normalized = (

512 mu_y + y_pred[0:X_test.shape[0]]*sigma_y).reshape(-1, 1)

513 for i in range(len(der_indices)):

514 factor = sigma_y

515 for j in range(len(der_indices[i])):

516 factor = factor / \

517 (sigmas_x[0][der_indices[i][j][0] - 1])**(der_indices[i][j][1])

518 y_train_normalized = np.vstack((y_train_normalized, (y_pred[(

519 i + 1) * X_test.shape[0]: (i + 2) * X_test.shape[0]] * factor[0, 0]).reshape(-1, 1)))

520 return y_train_normalized

521

522

523def transform_predictions_directional(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test):

524 """

525 Rescale predicted function values and **directional derivatives** from normalized space

526 back to their original scale.

527

528 This function assumes the derivatives are **directional** (not multi-index),

529 so it applies only output scaling (sigma_y) to derivatives.

530

531 Parameters:

532 ----------

533 y_pred : ndarray of shape (n_total,)

534 Predicted mean values from the GP model in normalized space

535 (includes function values and directional derivatives).

536 mu_y : float

537 Mean of the original function values (before normalization).

538 sigma_y : float

539 Standard deviation of the original function values (before normalization).

540 sigmas_x : ndarray of shape (1, nvars)

541 Standard deviations of the input variables (not used here but included for compatibility).

542 der_indices : list of lists

543 Directions for directional derivatives (each sublist represents a direction vector).

544 X_test : ndarray of shape (n_samples, nvars)

545 Test input points corresponding to the prediction blocks.

546

547 Returns:

548 -------

549 y_train_normalized : ndarray of shape (n_total, 1)

550 Rescaled function values and directional derivatives in the original scale.

551

552 Example:

553 --------

554 >>> y_pred.shape = (n_total,)

555 >>> transform_predictions_directional(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test).shape

556 (n_total, 1)

557 """

558 y_train_normalized = (

559 mu_y + y_pred[0:X_test.shape[0]]*sigma_y).reshape(-1, 1)

560 for i in range(len(der_indices)):

561 factor = sigma_y

562 y_train_normalized = np.vstack((y_train_normalized, (y_pred[(

563 i + 1) * X_test.shape[0]: (i + 2) * X_test.shape[0]] * factor[0, 0]).reshape(-1, 1)))

564 return y_train_normalized

565

566

567def normalize_x_data_test(X_test, sigmas_x, mus_x):

568 """

569 Normalize test input data using the mean and standard deviation from the training inputs.

570

571 Parameters:

572 ----------

573 X_test : ndarray of shape (n_samples, nvars)

574 Test input points to be normalized.

575 sigmas_x : ndarray of shape (1, nvars)

576 Standard deviations of the training inputs for each variable (used for scaling).

577 mus_x : ndarray of shape (1, nvars)

578 Means of the training inputs for each variable (used for centering).

579

580 Returns:

581 -------

582 X_train_normalized : ndarray of shape (n_samples, nvars)

583 Normalized test inputs.

584

585 Example:

586 --------

587 >>> X_test = np.array([[2.0, 3.0]])

588 >>> sigmas_x = np.array([[1.0, 2.0]])

589 >>> mus_x = np.array([[0.0, 1.0]])

590 >>> normalize_x_data_test(X_test, sigmas_x, mus_x)

591 array([[2.0, 1.0]])

592 """

593

594 X_train_normalized = (X_test - mus_x) / sigmas_x

595

596 return X_train_normalized

597

598

599def normalize_x_data_train(X_train):

600 """

601 Normalize training input data by centering and scaling each variable.

602

603 Parameters:

604 ----------

605 X_train : ndarray of shape (n_samples, nvars)

606 Training input points.

607

608 Returns:

609 -------

610 X_train_normalized : ndarray of shape (n_samples, nvars)

611 Normalized training inputs.

612 mean_vec_x : ndarray of shape (1, nvars)

613 Mean values for each input variable (used for centering).

614 std_vec_x : ndarray of shape (1, nvars)

615 Standard deviations for each input variable (used for scaling).

616

617 Example:

618 --------

619 >>> X_train = np.array([[1.0, 2.0], [3.0, 4.0]])

620 >>> normalize_x_data_train(X_train)

621 (array([[-1., -1.], [ 1., 1.]]), array([[2., 3.]]), array([[1., 1.]]))

622 """

623 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1)

624 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1)

625

626 X_train_normalized = (X_train - mean_vec_x) / std_vec_x

627

628 return X_train_normalized

629

630

631def normalize_directions(sigmas_x, rays):

632 """

633 Normalize direction vectors (rays) based on input scaling.

634

635 This function rescales direction vectors used for **directional derivatives** so that

636 they are consistent with normalized input space.

637

638 Parameters:

639 ----------

640 sigmas_x : ndarray of shape (1, nvars)

641 Standard deviations of the input variables (used for scaling each direction).

642 rays : ndarray of shape (nvars, n_directions)

643 Direction vectors (columns) in the original input space.

644

645 Returns:

646 -------

647 transformed_rays : ndarray of shape (nvars, n_directions)

648 Normalized direction vectors.

649

650 Example:

651 --------

652 >>> sigmas_x = np.array([[2.0, 1.0]])

653 >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]])

654 >>> normalize_directions(sigmas_x, rays)

655 array([[0.5, 0. ],

656 [0. , 1. ]])

657 """

658 transformed_rays = np.zeros(rays.shape)

659 for i in range(rays.shape[1]):

660 transformed_rays[:, i] = np.diag(1/sigmas_x.flatten()) @ rays[:, i]

661 return transformed_rays

662

663

664# def normalize_directions_2(sigmas_x, rays):

665# """

666# Normalize direction vectors (rays) based on input scaling.

667

668# This function rescales direction vectors used for **directional derivatives** so that

669# they are consistent with normalized input space.

670

671# Parameters:

672# ----------

673# sigmas_x : ndarray of shape (1, nvars)

674# Standard deviations of the input variables (used for scaling each direction).

675# rays : ndarray of shape (nvars, n_directions)

676# Direction vectors (columns) in the original input space.

677

678# Returns:

679# -------

680# transformed_rays : ndarray of shape (nvars, n_directions)

681# Normalized direction vectors.

682

683# Example:

684# --------

685# >>> sigmas_x = np.array([[2.0, 1.0]])

686# >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]])

687# >>> normalize_directions(sigmas_x, rays)

688# array([[0.5, 0. ],

689# [0. , 1. ]])

690# """

691# transformed_rays_list = []

692# for i in range(len(rays)):

693

694# transformed_rays = np.zeros(rays[i].shape)

695# for j in range(transformed_rays.shape[1]):

696# transformed_rays[:, j] = np.diag(

697# 1/sigmas_x.flatten()) @ rays[i][:, j]

698# transformed_rays_list.append(transformed_rays)

699# return transformed_rays_list

700

701

702def normalize_directions_2(sigmas_x, rays_array):

703 """

704 Normalize direction vectors (rays) based on input scaling.

705

706 This function rescales direction vectors used for **directional derivatives** so that

707 they are consistent with normalized input space.

708

709 Parameters:

710 ----------

711 sigmas_x : ndarray of shape (1, nvars)

712 Standard deviations of the input variables (used for scaling each direction).

713 rays : ndarray of shape (nvars, n_directions)

714 Direction vectors (columns) in the original input space.

715

716 Returns:

717 -------

718 transformed_rays : ndarray of shape (nvars, n_directions)

719 Normalized direction vectors.

720

721 Example:

722 --------

723 >>> sigmas_x = np.array([[2.0, 1.0]])

724 >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]])

725 >>> normalize_directions(sigmas_x, rays)

726 array([[0.5, 0. ],

727 [0. , 1. ]])

728 """

729

730 return [rays_array[i] / sigmas_x.flatten()[:, None] for i in range(len(rays_array))]

731

732

733def normalize_y_data(X_train, y_train, sigma_data, der_indices):

734 """

735 Normalize function values, derivatives, and observational noise for training data.

736

737 This function:

738 - Normalizes function values (`y_train[0]`) to have zero mean and unit variance.

739 - Scales **derivatives** using the chain rule, considering input normalization.

740 - Scales **observational noise** (`sigma_data`) accordingly.

741

742 Parameters:

743 ----------

744 X_train : ndarray of shape (n_samples, nvars)

745 Training input points (used to compute input normalization statistics).

746 y_train : list of arrays

747 List where:

748 - y_train[0] contains function values.

749 - y_train[1:], if present, contain derivative values for each derivative component.

750 sigma_data : float or None

751 Standard deviation of the observational noise (for function values).

752 If provided, it will be normalized.

753 der_indices : list of lists

754 Multi-index derivative structures for each derivative component.

755

756 Returns:

757 -------

758 y_train_normalized : ndarray of shape (n_total,)

759 Normalized function values and derivatives (flattened).

760 mean_vec_y : ndarray of shape (m, 1)

761 Mean of function values before normalization.

762 std_vec_y : ndarray of shape (m, 1)

763 Standard deviation of function values before normalization.

764 std_vec_x : ndarray of shape (1, nvars)

765 Standard deviations of input variables.

766 mean_vec_x : ndarray of shape (1, nvars)

767 Means of input variables.

768 noise_std_normalized : float or None

769 Normalized observational noise standard deviation.

770

771 Example:

772 --------

773 >>> normalize_y_data(X_train, y_train, sigma_data=0.75, der_indices=[[[1, 1]]])

774 (y_train_normalized, mean_vec_y, std_vec_y,

775 std_vec_x, mean_vec_x, noise_std_normalized)

776 """

777

778 num_points = len(X_train)

779 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1)

780 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1)

781

782 mean_vec_y = np.mean(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1)

783 std_vec_y = np.std(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1)

784

785 y_train_normalized = (y_train[0] - mean_vec_y)/std_vec_y

786 noise_std_normalized = sigma_data

787 for i in range(len(der_indices)):

788 factor = 1/std_vec_y

789 for j in range(len(der_indices[i])):

790 factor = factor * \

791 (std_vec_x[0][der_indices[i][j][0] - 1]

792 )**(der_indices[i][j][1])

793 y_train_normalized = np.vstack(

794 (y_train_normalized.reshape(-1, 1), y_train[i + 1] * factor[0, 0]))

795 if sigma_data is not None:

796 noise_std_normalized[(

797 i + 1)*num_points:(i+2)*num_points] = sigma_data[(

798 i + 1)*num_points:(i+2)*num_points] * factor[0, 0]

799 else:

800 noise_std_normalized = None

801 # Scale noise if provided

802

803 return y_train_normalized.flatten(), mean_vec_y, std_vec_y, std_vec_x, mean_vec_x, noise_std_normalized

804

805

806def normalize_y_data_directional(X_train, y_train, sigma_data, der_indices):

807 """

808 Normalize function values and **directional derivatives** for training data.

809

810 This function:

811 - Normalizes **function values (`y_train[0]`)** to have zero mean and unit variance.

812 - Scales **directional derivatives** (`y_train[1:]`) by the **function value standard deviation (`std_vec_y`)**.

813

814 Parameters:

815 ----------

816 X_train : ndarray of shape (n_samples, nvars)

817 Training input points (used to compute input normalization statistics).

818 y_train : list of arrays

819 List where:

820 - y_train[0] contains function values.

821 - y_train[1:], if present, contain directional derivative values for each direction.

822 der_indices : list of lists

823 Directions for directional derivatives (each sublist represents a direction vector).

824

825 Returns:

826 -------

827 y_train_normalized : ndarray of shape (n_total,)

828 Normalized function values and directional derivatives (flattened).

829 mean_vec_y : ndarray of shape (m, 1)

830 Mean of function values before normalization.

831 std_vec_y : ndarray of shape (m, 1)

832 Standard deviation of function values before normalization.

833 std_vec_x : ndarray of shape (1, nvars)

834 Standard deviations of input variables.

835 mean_vec_x : ndarray of shape (1, nvars)

836 Means of input variables.

837

838 Example:

839 --------

840 >>> normalize_y_data_directional(X_train, y_train, der_indices=[[[1, 0.5], [2, 0.5]]])

841 (y_train_normalized, mean_vec_y, std_vec_y, std_vec_x, mean_vec_x)

842 """

843 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1)

844 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1)

845

846 mean_vec_y = np.mean(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1)

847 std_vec_y = np.std(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1)

848

849 y_train_normalized = (y_train[0] - mean_vec_y)/std_vec_y

850

851 for i in range(len(der_indices)):

852 factor = 1/std_vec_y

853 y_train_normalized = np.vstack(

854 (y_train_normalized.reshape(-1, 1), y_train[i + 1] * factor[0, 0]))

855 if sigma_data is not None:

856 noise_std_normalized = sigma_data / std_vec_y[0, 0]

857 else:

858 noise_std_normalized = None

859

860 return y_train_normalized.flatten(), mean_vec_y, std_vec_y, std_vec_x, mean_vec_x, noise_std_normalized

861

862def generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices):

863 """

864 Generate diagonal noise covariance matrices for each submodel component

865 (including function values and their associated derivatives).

866

867 This function constructs a list of diagonal matrices where each matrix corresponds

868 to a specific group of training indices (e.g., for a submodel), and includes

869 both function value noise and associated derivative noise.

870

871 Parameters

872 ----------

873 sigma_data : ndarray of shape (n_total, n_total)

874 Full covariance (typically diagonal) matrix for all training data, including function values and derivatives.

875 index : list of lists of int

876 List where each sublist contains indices of the function values for one submodel (e.g., a cluster or partition).

877 der_indices : list of lists

878 Each sublist contains derivative directions for the submodel, corresponding to base_der_indices.

879 num_points : int

880 Number of training points per function (used to compute index offsets for derivatives).

881 base_der_indices : list of lists

882 Master list of derivative indices that define the ordering of blocks in the covariance matrix.

883

884 Returns

885 -------

886 sub_model_matricies : list of ndarray

887 List of diagonal noise matrices (ndarray of shape (n_submodel_total, n_submodel_total)) for each submodel,

888 combining noise contributions from function values and all applicable derivative components.

889

890 Raises

891 ------

892 Exception

893 If a derivative index in `der_indices` is not found in `base_der_indices`.

894

895 Example

896 -------

897 >>> sigma_data.shape = (300, 300)

898 >>> index = [[0, 1, 2], [3, 4, 5]]

899 >>> der_indices = [[[1, 1]], [[2, 1], [1, 2]]]

900 >>> base_der_indices = [[[1, 1]], [[2, 1]], [[1, 2]]]

901 >>> num_points = 100

902 >>> generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices)

903 [array of shape (6, 6), array of shape (9, 9)]

904 """

905

906 sub_model_matricies = []

907 for i, idx in enumerate(index):

908 values = np.diag(sigma_data[:num_points, :num_points])

909 for j in range(len(der_indices[i])):

910 for k, item in enumerate(base_der_indices):

911 if item == der_indices[i][j]:

912 scale_factor = k

913 break

914 else:

915 scale_factor = -1

916 continue

917

918 if scale_factor == -1:

919 raise Exception('Unknown Error')

920 scale_factor = k + 1

921 indices = (scale_factor*num_points)+np.array(idx[j])

922 if len(indices) == 0:

923 pass

924 else:

925 values = np.concatenate(

926 (values, sigma_data[indices[0:], indices[0:]].flatten()))

927 sub_model_matricies.append(np.diag(values))

928

929 return sub_model_matricies

930def generate_submodel_noise_matricies_old(sigma_data, index, der_indices, num_points, base_der_indices):

931 """

932 Generate diagonal noise covariance matrices for each submodel component

933 (including function values and their associated derivatives).

934

935 This function constructs a list of diagonal matrices where each matrix corresponds

936 to a specific group of training indices (e.g., for a submodel), and includes

937 both function value noise and associated derivative noise.

938

939 Parameters

940 ----------

941 sigma_data : ndarray of shape (n_total, n_total)

942 Full covariance (typically diagonal) matrix for all training data, including function values and derivatives.

943 index : list of lists of int

944 List where each sublist contains indices of the function values for one submodel (e.g., a cluster or partition).

945 der_indices : list of lists

946 Each sublist contains derivative directions for the submodel, corresponding to base_der_indices.

947 num_points : int

948 Number of training points per function (used to compute index offsets for derivatives).

949 base_der_indices : list of lists

950 Master list of derivative indices that define the ordering of blocks in the covariance matrix.

951

952 Returns

953 -------

954 sub_model_matricies : list of ndarray

955 List of diagonal noise matrices (ndarray of shape (n_submodel_total, n_submodel_total)) for each submodel,

956 combining noise contributions from function values and all applicable derivative components.

957

958 Raises

959 ------

960 Exception

961 If a derivative index in `der_indices` is not found in `base_der_indices`.

962

963 Example

964 -------

965 >>> sigma_data.shape = (300, 300)

966 >>> index = [[0, 1, 2], [3, 4, 5]]

967 >>> der_indices = [[[1, 1]], [[2, 1], [1, 2]]]

968 >>> base_der_indices = [[[1, 1]], [[2, 1]], [[1, 2]]]

969 >>> num_points = 100

970 >>> generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices)

971 [array of shape (6, 6), array of shape (9, 9)]

972 """

973

974 sub_model_matricies = []

975 for i, idx in enumerate(index):

976 values = np.diag(sigma_data[:num_points, :num_points])

977 for j in range(len(der_indices[i])):

978 for k, item in enumerate(base_der_indices):

979 if item == der_indices[i][j]:

980 scale_factor = k

981 break

982 else:

983 scale_factor = -1

984 continue

985

986 if scale_factor == -1:

987 raise Exception('Unknown Error')

988 scale_factor = k + 1

989 indices = (scale_factor*num_points)+np.array(idx)

990 values = np.concatenate(

991 (values, sigma_data[indices[0:], indices[0:]].flatten()))

992 sub_model_matricies.append(np.diag(values))

993

994 return sub_model_matricies

995

996

997def matern_kernel_builder(nu, oti_module=None):

998 """

999 Symbolically builds the Matérn kernel function with given smoothness ν.

1000

1001 Parameters

1002 ----------

1003 nu : float

1004 Smoothness parameter of the Matérn kernel. Should be a half-integer (e.g., 0.5, 1.5, 2.5, ...).

1005 oti_module : module, optional

1006 The PyOTI static module to use for exp/sqrt. If None, uses numpy.

1007

1008 Returns

1009 -------

1010 callable

1011 A lambdified function that evaluates the Matérn kernel as a function of distance r.

1012 """

1013 r = sp.symbols('r')

1014 nu = sp.Rational(2 * nu, 2)

1015 prefactor = (2 ** (1 - nu)) / sp.gamma(nu)

1016 z = sp.sqrt(2 * nu) * r

1017 k_r = prefactor * z**nu * sp.simplify(sp.besselk(nu, z))

1018 expr = sp.simplify(k_r)

1019

1020 if oti_module is not None:

1021 custom_dict = {"exp": oti_module.exp}

1022 matern_kernel_func = sp.lambdify(r, expr, modules=[custom_dict, "numpy"])

1023 else:

1024 matern_kernel_func = sp.lambdify(r, expr, modules=["numpy"])

1025

1026 return matern_kernel_func

1027

1028

1029def matern_kernel_grad_builder(nu, oti_module=None):

1030 """

1031 Builds the derivative df/dr of the Matérn kernel function with given smoothness ν.

1032

1033 Parameters

1034 ----------

1035 nu : float

1036 Smoothness parameter of the Matérn kernel (half-integer, e.g. 0.5, 1.5, 2.5).

1037 oti_module : module, optional

1038 The PyOTI static module to use for exp. If None, uses numpy.

1039

1040 Returns

1041 -------

1042 callable

1043 A lambdified function evaluating df/dr as a function of scaled distance r.

1044 """

1045 r = sp.symbols('r')

1046 nu = sp.Rational(2 * nu, 2)

1047 prefactor = (2 ** (1 - nu)) / sp.gamma(nu)

1048 z = sp.sqrt(2 * nu) * r

1049 k_r = prefactor * z**nu * sp.simplify(sp.besselk(nu, z))

1050 dk_dr = sp.diff(sp.simplify(k_r), r)

1051 dk_dr = sp.simplify(dk_dr)

1052

1053 if oti_module is not None:

1054 custom_dict = {"exp": oti_module.exp}

1055 return sp.lambdify(r, dk_dr, modules=[custom_dict, "numpy"])

1056 else:

1057 return sp.lambdify(r, dk_dr, modules=["numpy"])

1058

1059

1060

1061def generate_bernoulli_numbers(n_max: int) -> list[Fraction]:

1062 """

1063 Generates Bernoulli numbers B_0 to B_n_max using their recurrence relation.

1064

1065 Args:

1066 n_max: The maximum order of the Bernoulli number to generate.

1067

1068 Returns:

1069 A list of Bernoulli numbers as Fraction objects.

1070 """

1071 if n_max < 0:

1072 return []

1073

1074 B = [Fraction(0)] * (n_max + 1)

1075 B[0] = Fraction(1) # B_0 = 1

1076

1077 for n in range(1, n_max + 1):

1078 # The recurrence relation sum: sum_{k=0 to n} C(n+1, k) * B_k = 0

1079 # We solve for B_n: B_n = - (1 / C(n+1, n)) * sum_{k=0 to n-1} C(n+1, k) * B_k

1080 sum_val = Fraction(0)

1081 for k in range(n):

1082 # Binomial coefficient C(n+1, k)

1083 binomial_coeff = comb(n + 1, k, exact=True)

1084 sum_val += binomial_coeff * B[k]

1085

1086 # C(n+1, n) is just n+1

1087 B[n] = -sum_val / (n + 1)

1088

1089 # B_k is zero for all odd k > 1

1090 if n > 1 and n % 2 != 0:

1091 B[n] = Fraction(0)

1092

1093 return B

1094

1095def generate_bernoulli_polynomial(alpha: int) -> Polynomial:

1096 """

1097 Generates the (2*alpha)-th Bernoulli polynomial, B_{2*alpha}(x).

1098

1099 Args:

1100 alpha: A non-negative integer.

1101

1102 Returns:

1103 A numpy.polynomial.Polynomial object representing the polynomial.

1104 """

1105 if not isinstance(alpha, int) or alpha < 0:

1106 raise ValueError("alpha must be a non-negative integer.")

1107

1108 n = 2 * alpha

1109

1110 # Step 1: Generate the required Bernoulli numbers

1111 bernoulli_nums = generate_bernoulli_numbers(n)

1112

1113 # Step 2: Construct the polynomial coefficients

1114 # The polynomial is sum_{k=0 to n} C(n, k) * B_k * x^(n-k)

1115 # The coefficient of x^j is C(n, n-j) * B_{n-j}

1116 # numpy.polynomial expects coeffs from lowest power (x^0) to highest (x^n)

1117

1118 coeffs = np.zeros(n + 1, dtype=object)

1119

1120 for j in range(n + 1): # j represents the power of x

1121 k = n - j

1122 # Coefficient for x^j is C(n, k) * B_k

1123 binomial_coeff = comb(n, k, exact=True)

1124 term_coeff = binomial_coeff * bernoulli_nums[k]

1125 coeffs[j] = float(term_coeff) # Convert fraction to float for Polynomial class

1126

1127 # The Polynomial class takes coefficients from lowest power to highest

1128 return Polynomial(coeffs)

1129

1130def generate_bernoulli_lambda(alpha: int) -> callable:

1131 """

1132 Generates a callable (lambda) function for the (2*alpha)-th Bernoulli polynomial.

1133

1134 Args:

1135 alpha: A non-negative integer.

1136

1137 Returns:

1138 A callable function that evaluates B_{2*alpha}(x).

1139 """

1140 # The numpy Polynomial object is already a callable function.

1141 return ((-1)**(alpha + 1) * (2*np.pi)**(2 * alpha)) / (math.factorial(2 * alpha)) * generate_bernoulli_polynomial(alpha)

1142

1143

1144def robust_local_optimization(func, x0, args=(), lb=None, ub=None, debug=False):

1145 """Robust L-BFGS-B optimization with abnormal termination handling"""

1146 bounds = list(zip(lb, ub)) if lb is not None and ub is not None else None

1147

1148 result = minimize(

1149 func, x0, args=args, method="L-BFGS-B", bounds=bounds,

1150 options={"disp": False, "maxiter": 1000, "gtol": 1e-7}

1151 )

1152 sys.stdout.flush()

1153

1154 if "ABNORMAL_TERMINATION_IN_LNSRCH" in result.message:

1155 result.recovered_from_abnormal = False

1156 if debug:

1157 print(f"L-BFGS-B terminated abnormally: {result.message:}")

1158 print("Using current point as recovered solution...")

1159

1160 # Create result with current point

1161 class RecoveredResult:

1162 def __init__(self, x, fun, message):

1163 self.x = x

1164 self.fun = fun

1165 self.success = False

1166 self.message = message

1167 self.recovered_from_abnormal = True

1168

1169 current_f = func(result.x, *args)

1170 return RecoveredResult(result.x, current_f, f"Recovered from abnormal termination: {result.message}")

1171 else:

1172 return result

1173

1174

1175def should_accept_local_result(local_res, current_best_f, is_feasible, debug=False):

1176 """Check if local optimization result should be accepted"""

1177 if not local_res.success and not local_res.recovered_from_abnormal:

1178 return False

1179

1180 if not is_feasible(local_res.x):

1181 if debug:

1182 print("Local optimization result is infeasible - rejecting")

1183 return False

1184

1185 if local_res.fun >= current_best_f:

1186 if debug:

1187 print(

1188 "Local optimization did not improve:")

1189 return False

1191 return True

1195def jade(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={},

1196 pop_size=100, n_generations=100, p=0.1, c=0.1,

1197 minstep=1e-6, stagnation_limit=15, debug=False,

1198 local_opt_every=15, initial_positions=None, seed=42,

1199 local_optimizer=None, func_and_grad=None, grad_func=None):

1200 """

1201 JADE (Adaptive Differential Evolution) with optional local refinement

1202 and stagnation-based stopping criterion.

1203

1204 JADE: Adaptive Differential Evolution With Optional External Archive

1205 https://ieeexplore.ieee.org/abstract/document/5208221

1206

1207 Parameters

1208 ----------

1209 func : callable

1210 Objective function to minimize.

1211 lb, ub : array-like

1212 Lower and upper bounds.

1213 ieqcons : list

1214 List of inequality constraint functions.

1215 f_ieqcons : callable or None

1216 Single function returning array of constraint values.

1217 args : tuple

1218 Extra arguments passed to func.

1219 kwargs : dict

1220 Extra keyword arguments passed to func.

1221 pop_size : int

1222 Population size.

1223 n_generations : int

1224 Maximum number of generations.

1225 p : float

1226 Fraction of top individuals for p-best selection.

1227 c : float

1228 Learning rate for parameter adaptation.

1229 minstep : float

1230 Minimum position change to accept a new best (when improving).

1231 stagnation_limit : int

1232 Stop if no improvement for this many consecutive generations.

1233 debug : bool

1234 Print debug information.

1235 local_opt_every : int or None

1236 Run local optimization every this many generations. None to disable.

1237 initial_positions : array-like or None

1238 Initial positions to seed the population.

1239 seed : int

1240 Random seed.

1241 """

1242

1243 lb = np.asarray(lb)

1244 ub = np.asarray(ub)

1245 D = len(lb)

1246 np.random.seed(seed)

1247

1248 # Constraint setup

1249 if f_ieqcons is not None:

1250 def cons(x): return np.asarray(f_ieqcons(x, *args, **kwargs))

1251 elif ieqcons:

1252 def cons(x): return np.asarray([c(x, *args, **kwargs) for c in ieqcons])

1253 else:

1254 def cons(x): return np.array([0.0])

1255

1256 def is_feasible(x):

1257 return np.all(cons(x) >= 0)

1258

1259 # Initialize population using Sobol

1260 sampler = qmc.Sobol(d=D, scramble=True, seed=seed)

1261 sobol_sample = sampler.random_base2(m=int(np.ceil(np.log2(pop_size))))

1262 pop = qmc.scale(sobol_sample[:pop_size], lb, ub)

1263 if initial_positions is not None:

1264 n_init = min(len(initial_positions), pop_size)

1265 pop[:n_init] = initial_positions[:n_init]

1266

1267 # Fitness and feasibility

1268 fitness = np.array([func(ind, *args, **kwargs) for ind in pop])

1269 feasible = np.array([is_feasible(ind) for ind in pop])

1270

1271 # Initialize global best

1272 g_idx = np.argmin(fitness * feasible + (~feasible) * 1e10)

1273 g_best = pop[g_idx].copy()

1274 f_best = fitness[g_idx]

1275

1276 mu_F = 0.5

1277 mu_CR = 0.5

1278 archive = []

1279

1280 # Store previous best for minstep comparison

1281 prev_g_best = g_best.copy()

1282

1283 # Stagnation counter

1284 stagnation_count = 0

1285

1286 for gen in range(1, n_generations + 1):

1287 f_prev_gen = f_best

1288

1289 new_pop = np.zeros_like(pop)

1290 new_fitness = np.zeros(pop_size)

1291 new_F_list = []

1292 new_CR_list = []

1293

1294 for i in range(pop_size):

1295 # Adaptive F and CR

1296 F = np.random.standard_cauchy() * 0.1 + mu_F

1297 F = np.clip(F, 0, 1)

1298 CR = np.random.normal(mu_CR, 0.1)

1299 CR = np.clip(CR, 0, 1)

1300

1301 # p-best selection

1302 p_num = max(2, int(np.ceil(p * pop_size)))

1303 pbest_idx = np.random.choice(np.argsort(fitness)[:p_num])

1304 x_pbest = pop[pbest_idx]

1305

1306 # Mutation: current-to-pbest

1307 idxs = [idx for idx in range(pop_size) if idx != i]

1308 r1, r2 = np.random.choice(idxs, 2, replace=False)

1309 x_mut = pop[i] + F * (x_pbest - pop[i]) + F * (pop[r1] - pop[r2])

1310 x_mut = np.clip(x_mut, lb, ub)

1311

1312 # Crossover

1313 jrand = np.random.randint(D)

1314 trial = np.array([

1315 x_mut[j] if np.random.rand() < CR or j == jrand

1316 else pop[i][j] for j in range(D)

1317 ])

1318 trial = np.clip(trial, lb, ub)

1319

1320 f_trial = func(trial, *args, **kwargs)

1321 if is_feasible(trial) and f_trial < fitness[i]:

1322 new_pop[i] = trial

1323 new_fitness[i] = f_trial

1324 archive.append(pop[i].copy())

1325 new_F_list.append(F)

1326 new_CR_list.append(CR)

1327 else:

1328 new_pop[i] = pop[i]

1329 new_fitness[i] = fitness[i]

1330

1331 # Update population

1332 pop = new_pop

1333 fitness = new_fitness

1334

1335 # Update global best

1336 feasible_idx = [idx for idx in range(pop_size) if is_feasible(pop[idx])]

1337 if feasible_idx:

1338 idx_best = feasible_idx[np.argmin(fitness[feasible_idx])]

1339 if fitness[idx_best] < f_best:

1340 # Check minstep only when g_best improves

1341 step_size = np.linalg.norm(pop[idx_best] - prev_g_best)

1342 if step_size <= minstep:

1343 if debug:

1344 print(f"Stopping: Position change < {minstep} at generation {gen}")

1345 g_best = pop[idx_best].copy()

1346 f_best = fitness[idx_best]

1347 break

1348

1349 g_best = pop[idx_best].copy()

1350 f_best = fitness[idx_best]

1351 prev_g_best = g_best.copy()

1352

1353 # Adapt F and CR

1354 if new_F_list:

1355 mu_F = (1 - c) * mu_F + c * np.mean(new_F_list)

1356 mu_CR = (1 - c) * mu_CR + c * np.mean(new_CR_list)

1357

1358 # Periodic local refinement

1359 if local_opt_every is not None and gen % local_opt_every == 0:

1360 if local_optimizer is not None:

1361 res = local_optimizer(func, g_best, lb, ub)

1362 elif callable(func_and_grad):

1363 res = minimize(func_and_grad, g_best, args=args, method="L-BFGS-B",

1364 jac=True, bounds=np.stack((lb, ub), axis=1),

1365 options={"maxiter": 50})

1366 elif callable(grad_func):

1367 def _fg(x): return func(x), grad_func(x)

1368 res = minimize(_fg, g_best, args=args, method="L-BFGS-B",

1369 jac=True, bounds=np.stack((lb, ub), axis=1),

1370 options={"maxiter": 50})

1371 else:

1372 res = minimize(func, g_best, args=args,

1373 bounds=np.stack((lb, ub), axis=1),

1374 options={"maxiter":50})

1375 if is_feasible(res.x) and res.fun < f_best:

1376 g_best = res.x.copy()

1377 f_best = res.fun

1378 prev_g_best = g_best.copy()

1379 if debug:

1380 print(f"Local refinement at gen {gen}: nit={res.nit}, nfev={res.nfev}, "

1381 f"f={res.fun:.6e}, success={res.success}, message={res.message}")

1382

1383 # Debug print

1384 if debug:

1385 print(f"Gen {gen}: best f={f_best}")

1386

1387 # Check stagnation stopping criterion

1388 if f_best < f_prev_gen:

1389 stagnation_count = 0

1390 else:

1391 stagnation_count += 1

1392

1393 if stagnation_count >= stagnation_limit:

1394 if debug:

1395 print(f"Stopping: No improvement for {stagnation_limit} generations at gen {gen}")

1396 break

1397

1398 return g_best, f_best

1399

1400def pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={},

1401 pop_size=100, omega=0.5, phip=0.5, phig=0.5, n_generations=100,

1402 minstep=1e-6, minfunc=1e-6, debug=False, seed=42,

1403 local_opt_every=15, initial_positions=None, local_optimizer=None,

1404 func_and_grad=None, grad_func=None):

1405 """

1406 Particle Swarm Optimization with periodic local refinement

1407 R. C. Eberhart, Y. Shi and J. Kennedy, Swarm Intelligence, CA, San Mateo:Morgan Kaufmann, 2001.

1408 https://theswissbay.ch/pdf/Gentoomen%20Library/Artificial%20Intelligence/Swarm%20Intelligence/Swarm%20intelligence%20-%20James%20Kennedy.pdf

1409 Parameters:

1410 -----------

1411 func : callable

1412 Objective function to minimize

1413 lb : array_like

1414 Lower bounds for variables

1415 ub : array_like

1416 Upper bounds for variables

1417 ieqcons : list, optional

1418 List of inequality constraint functions

1419 f_ieqcons : callable, optional

1420 Function returning array of inequality constraints

1421 args : tuple, optional

1422 Extra arguments passed to objective function

1423 kwargs : dict, optional

1424 Extra keyword arguments passed to objective function

1425 swarmsize : int, optional

1426 Number of particles in swarm

1427 omega : float, optional

1428 Inertia weight

1429 phip : float, optional

1430 Personal best weight

1431 phig : float, optional

1432 Global best weight

1433 maxiter : int, optional

1434 Maximum number of iterations

1435 minstep : float, optional

1436 Minimum step size for convergence

1437 minfunc : float, optional

1438 Minimum function improvement for convergence

1439 debug : bool, optional

1440 Whether to print debug information

1441 seed : int, optional

1442 Random seed for reproducibility

1443 local_opt_every : int, optional

1444 Frequency of local optimization (every N iterations)

1445

1446 Returns:

1447 --------

1448 best_position : ndarray

1449 Best position found

1450 best_value : float

1451 Best function value found

1452 """

1453

1454 # Input validation

1455 lb = np.asarray(lb)

1456 ub = np.asarray(ub)

1457 D = len(lb)

1458

1459 assert len(lb) == len(

1460 ub), "Lower- and upper-bounds must be the same length"

1461 assert np.all(

1462 ub > lb), "All upper-bound values must be greater than lower-bound values"

1463 assert callable(func), "Invalid function handle"

1464

1465 # Set random seed

1466 np.random.seed(seed)

1467

1468 # Velocity bounds

1469 vhigh = np.abs(ub - lb)

1470 vlow = -vhigh

1471

1472 # Constraint setup

1473 if f_ieqcons is not None:

1474 def cons(x):

1475 return np.asarray(f_ieqcons(x, *args, **kwargs))

1476 elif ieqcons:

1477 def cons(x):

1478 return np.asarray([c(x, *args, **kwargs) for c in ieqcons])

1479 else:

1480 def cons(x):

1481 return np.array([0.0])

1482

1483 def is_feasible(x):

1484 return np.all(cons(x) >= 0)

1485

1486 # Initialize swarm using Sobol sequence

1487 sampler = qmc.Sobol(d=D, scramble=True, seed=seed)

1488 sobol_sample = sampler.random_base2(m=int(np.ceil(np.log2(pop_size))))

1489 x = qmc.scale(sobol_sample[:pop_size], lb, ub)

1490 if initial_positions is not None:

1491 n_init = min(len(initial_positions), pop_size)

1492 x[:n_init] = initial_positions[:n_init]

1493 # Initialize velocities and personal bests

1494 v = vlow + np.random.rand(pop_size, D) * (vhigh - vlow)

1495 p = np.copy(x)

1496 fp = np.array([func(p[i], *args, **kwargs) for i in range(pop_size)])

1497 feasibles = np.array([is_feasible(p[i]) for i in range(pop_size)])

1498

1499 # Initialize global best

1500 fg = np.inf

1501 g = None

1502

1503 for i in range(pop_size):

1504 if feasibles[i] and fp[i] < fg:

1505 fg = fp[i]

1506 g = p[i].copy()

1507

1508 # Main PSO loop

1509 it = 1

1510 while it <= n_generations:

1511 # Update velocities and positions

1512 rp = np.random.rand(pop_size, D)

1513 rg = np.random.rand(pop_size, D)

1514 v = omega * v + phip * rp * (p - x) + phig * rg * (g - x)

1515 x = np.clip(x + v, lb, ub)

1516

1517 # Evaluate particles and update personal/global bests

1518 for i in range(pop_size):

1519 fx = func(x[i], *args, **kwargs)

1520

1521 if fx < fp[i] and is_feasible(x[i]):

1522 p[i] = x[i].copy()

1523 fp[i] = fx

1524

1525 if fx < fg:

1526 stepsize = np.linalg.norm(g - x[i])

1527

1528 if debug:

1529 print(

1530 f'New best for swarm at iteration {it}: {x[i]} {fx}')

1531

1532 # Check convergence criteria

1533 if np.abs(fg - fx) <= minfunc:

1534 print(f'Stopping: Objective improvement < {minfunc}')

1535 return x[i], fx

1536 if stepsize <= minstep:

1537 print(f'Stopping: Position change < {minstep}')

1538 return x[i], fx

1539

1540 g = x[i].copy()

1541 fg = fx

1542

1543 # Periodic local refinement

1544 if local_opt_every is None:

1545 pass

1546 elif it % local_opt_every == 0:

1547 if local_optimizer is not None:

1548 local_res = local_optimizer(func, g, lb, ub)

1549 elif callable(func_and_grad):

1550 bounds = list(zip(lb, ub))

1551 local_res = minimize(func_and_grad, g, args=args, method="L-BFGS-B",

1552 jac=True, bounds=bounds,

1553 options={"maxiter": 20})

1554 if not hasattr(local_res, 'recovered_from_abnormal'):

1555 local_res.recovered_from_abnormal = False

1556 elif callable(grad_func):

1557 bounds = list(zip(lb, ub))

1558 def _fg(x): return func(x), grad_func(x)

1559 local_res = minimize(_fg, g, args=args, method="L-BFGS-B",

1560 jac=True, bounds=bounds,

1561 options={"maxiter": 20})

1562 if not hasattr(local_res, 'recovered_from_abnormal'):

1563 local_res.recovered_from_abnormal = False

1564 else:

1565 local_res = robust_local_optimization(

1566 func, g, args=args, lb=lb, ub=ub, debug=False

1567 )

1568

1569 if should_accept_local_result(local_res, fg, is_feasible, debug):

1570 if debug:

1571 print(

1572 "Gradient refinement improved")

1573 g = local_res.x.copy()

1574 fg = local_res.fun

1575

1576 if debug:

1577 print(f'Best after iteration {it}: {g} {fg}')

1578 it += 1

1579

1580 # Final checks

1581 print(f'Stopping: maximum iterations reached --> {n_generations}')

1582 if g is not None and not is_feasible(g):

1583 print("Warning: Optimization finished without a feasible solution.")

1584

1585 return g, fg

1586

1587

1588def ecl_acquisition(mu_N, var_N, threshold=0.0):

1589 """

1590 Entropy Contour Learning (ECL) acquisition function for gddegp models.

1591

1592 Implements the ECL acquisition function from equation (8):

1593 ECL(x | S_N, g) = -P(g(Y(x)) > 0) log P(g(Y(x)) > 0) - P(g(Y(x)) ≤ 0) log P(g(Y(x)) ≤ 0)

1594

1595 Where g is the affine limit state function g(Y(x)) = Y(x) - T and the failure

1596 region G is defined by g(Y(x)) ≤ 0.

1597

1598 Parameters:

1599 -----------

1600 gp_model : gddegp

1601 Trained gp model instance

1602 X : array-like, shape (n_points, n_features) or (n_features,)

1603 Points to evaluate the acquisition function

1604 rays_predict : ndarray

1605 Ray directions for prediction. Shape should be (n_dims, n_rays)

1606 params : ndarray

1607 Hyperparameters for gddegp prediction

1608 threshold : float, default=0.0

1609 Threshold T for the affine limit state function g(Y(x)) = Y(x) - T

1610 Default is 0.0, meaning failure region is defined by Y(x) ≤ 0

1611

1612 Returns:

1613 --------

1614 ecl_values : array-like, shape (n_points,) or float

1615 ECL acquisition function values (higher values indicate more informative points)

1616

1617 Examples:

1618 ---------

1619 >>> # After training your gddegp model

1620 >>> ecl_vals = ecl_acquisition(gp_model, candidate_points, rays, params, threshold=0.0)

1621 >>> next_point_idx = np.argmax(ecl_vals)

1622 >>> next_point = candidate_points[next_point_idx]

1623 """

1624

1625 # Convert variance to standard deviation

1626 sigma_N = np.sqrt(np.maximum(var_N, 1e-15)) # Ensure positive variance

1627

1628 # Handle single point case

1629 if mu_N.ndim == 0:

1630 mu_N = np.array([mu_N])

1631 if sigma_N.ndim == 0:

1632 sigma_N = np.array([sigma_N])

1633

1634 # Compute standardized values: (μ_N(x) - T) / σ_N(x)

1635 sigma_N = np.maximum(sigma_N, 1e-10) # Numerical stability

1636 z = (mu_N - threshold) / sigma_N

1637

1638 # Compute probabilities using standard normal CDF Φ

1639 # P(g(Y(x)) > 0) = P(Y(x) > T) = 1 - Φ((μ_N(x) - T)/σ_N(x))

1640 p_safe = 1 - norm.cdf(z) # Probability of being in safe region

1641

1642 # Compute ECL using binary entropy: H(p) = -p*log(p) - (1-p)*log(1-p)

1643 # Clip probabilities for numerical stability

1644 p_safe = np.clip(p_safe, 1e-15, 1 - 1e-15)

1645 ecl_values = -p_safe * np.log(p_safe) - (1 - p_safe) * np.log(1 - p_safe)

1646

1647 # Return scalar if single point, array otherwise

1648 return ecl_values[0] if len(ecl_values) == 1 else ecl_values

1649

1650

1651def ecl_batch_acquisition(gp_model, X, rays_predict, params, threshold=0.0, batch_size=1):

1652 """

1653 Batch ECL acquisition function that selects multiple points simultaneously.

1654

1655 Uses a greedy approach to select the next batch_size points that maximize

1656 the ECL criterion while maintaining diversity.

1657

1658 Parameters:

1659 -----------

1660 gp_model : gddegp

1661 Trained gddegp model instance

1662 X : array-like, shape (n_candidates, n_features)

1663 Candidate points to select from

1664 rays_predict : ndarray

1665 Ray directions for prediction

1666 params : ndarray

1667 Hyperparameters for gddegp prediction

1668 threshold : float, default=0.0

1669 Threshold for the limit state function

1670 batch_size : int, default=1

1671 Number of points to select in the batch

1672

1673 Returns:

1674 --------

1675 selected_indices : array-like, shape (batch_size,)

1676 Indices of selected points from X

1677 selected_points : array-like, shape (batch_size, n_features)

1678 The selected points

1679

1680 Examples:

1681 ---------

1682 >>> indices, points = ecl_batch_acquisition(gp_model, candidates, rays, params, batch_size=3)

1683 >>> next_experiments = points

1684 """

1685 X = np.atleast_2d(X)

1686 n_candidates = X.shape[0]

1687

1688 if batch_size >= n_candidates:

1689 return np.arange(n_candidates), X

1690

1691 selected_indices = []

1692 remaining_indices = list(range(n_candidates))

1693

1694 for _ in range(batch_size):

1695 if not remaining_indices:

1696 break

1697

1698 # Evaluate ECL for remaining candidates

1699 remaining_X = X[remaining_indices]

1700 ecl_values = ecl_acquisition(

1701 gp_model, remaining_X, rays_predict, params, threshold)

1702

1703 # Select point with highest ECL value

1704 best_local_idx = np.argmax(ecl_values)

1705 best_global_idx = remaining_indices[best_local_idx]

1706

1707 selected_indices.append(best_global_idx)

1708 remaining_indices.remove(best_global_idx)

1709

1710 selected_indices = np.array(selected_indices)

1711 selected_points = X[selected_indices]

1712

1713 return selected_indices, selected_points

1714

1715

1716def get_surrogate_gradient_ray(gp, x, params, fallback_axis=0, normalize=True, threshold=0.0):

1717 """

1718 Returns a normalized surrogate gradient direction (d x 1 column vector)

1719 at location x using the current GP model (any input dimension).

1720 If the GP mean at x is above threshold, returns -grad; else returns grad.

1721

1722 Parameters

1723 ----------

1724 gp : object

1725 Trained GP model instance with .predict (supports arbitrary input dim)

1726 x : array-like, shape (1, d)

1727 The input location where to compute the surrogate gradient direction

1728 params : array-like

1729 GP hyperparameters

1730 fallback_axis : int, default=0

1731 Axis to use if gradient norm is zero (default: 0)

1732 normalize : bool, default=True

1733 If True, return a unit vector; else, return unnormalized gradient

1734 threshold : float, default=0.0

1735 Threshold value for sign flip

1736

1737 Returns

1738 -------

1739 ray : ndarray, shape (d, 1)

1740 The chosen direction (as a column vector)

1741 grad : ndarray, shape (d, 1)

1742 The predicted gradient as a column vector (signed as above)

1743 """

1744 x = np.atleast_2d(x)

1745 d = x.shape[1]

1746 grad = np.zeros((d, 1))

1747

1748 # Predict each directional derivative along standard basis

1749 for i in range(d):

1750 basis = np.zeros((d, 1))

1751 basis[i, 0] = 1.0

1752 mu = gp.predict(x, basis, params, calc_cov=False, return_deriv=True)

1753 grad[i, 0] = mu[1] # Assumes mu[0]=f(x), mu[1]=first derivative

1754

1755 # Predict the GP mean at x (using any direction, e.g., first axis)

1756 basis0 = np.zeros((d, 1))

1757 basis0[0, 0] = 1.0

1758 mu_mean = gp.predict(x, basis0, params, calc_cov=False,

1759 return_deriv=False)[0]

1760

1761 # Flip sign if above threshold

1762 if mu_mean > threshold:

1763 grad = -grad

1764

1765 norm = np.linalg.norm(grad)

1766 if normalize:

1767 if norm < 1e-16:

1768 fallback = np.zeros((d, 1))

1769 fallback[fallback_axis, 0] = 1.0

1770 return fallback.reshape(-1, 1)

1771 else:

1772 return (grad / norm).reshape(-1, 1)

1773 else:

1774 return grad.reshape(-1, 1)

1775

1776

1777def finite_difference_gradient(gp, x, params, h=1e-5):

1778 """

1779 Compute central finite difference approximation of GP mean gradient at x.

1780

1781 Parameters

1782 ----------

1783 gp : object

1784 Trained GP model instance

1785 x : ndarray, shape (1, d)

1786 Point at which to compute finite difference gradient

1787 params : array-like

1788 GP hyperparameters

1789 h : float

1790 Step size

1791

1792 Returns

1793 -------

1794 grad_fd : ndarray, shape (d, 1)

1795 Central finite difference gradient estimate

1796 """

1797 x = np.atleast_2d(x)

1798 d = x.shape[1]

1799 grad_fd = np.zeros((d, 1))

1800 ray0 = np.zeros((d, 1))

1801 # Direction for mean prediction; can be any since return_deriv=False

1802 ray0[0, 0] = 1.0

1803

1804 # Evaluate GP mean at central point and shifted points

1805 for i in range(d):

1806 dx = np.zeros_like(x)

1807 dx[0, i] = h

1808

1809 f_plus = gp.predict(x + dx, ray0, params,

1810 calc_cov=False, return_deriv=False)

1811 f_minus = gp.predict(x - dx, ray0, params,

1812 calc_cov=False, return_deriv=False)

1813 grad_fd[i, 0] = (f_plus[0] - f_minus[0]) / (2 * h)

1814 return grad_fd

1815

1816

1817def check_gp_gradient(gp, x, params, h=1e-5, fallback_axis=0):

1818 """

1819 Compare GP-predicted gradient vs finite-difference at x.

1820 Prints and returns both.

1821 """

1822 # Surrogate-predicted gradient

1823 grad_sur = get_surrogate_gradient_ray(

1824 gp, x, params, fallback_axis=fallback_axis, normalize=False)

1825 # Finite difference gradient

1826 grad_fd = finite_difference_gradient(gp, x, params, h=h)

1827

1828 print("Surrogate GP gradient:\n", grad_sur.flatten())

1829 print("Finite-difference gradient:\n", grad_fd.flatten())

1830 print("Absolute error:\n", np.abs(grad_sur.flatten() - grad_fd.flatten()))

1831 print("Relative error:\n", np.abs(grad_sur.flatten() -

1832 grad_fd.flatten()) / (np.abs(grad_fd.flatten()) + 1e-15))

1833

1834 return grad_sur, grad_fd

1835

1836

1837def get_entropy_ridge_direction_nd(gp, x, params, threshold=0.0, h=1e-5,

1838 fallback_axis=0, normalize=True, random_dir=False, seed=None):

1839 """

1840 Get a direction tangent to the entropy level set ("ridge direction") at x.

1841 In higher dimensions, returns either a single direction or an orthonormal basis for the tangent space.

1842

1843 Parameters

1844 ----------

1845 gp : object

1846 Trained GP model instance with .predict

1847 x : array-like, shape (1, d)

1848 The input location

1849 params : array-like

1850 GP hyperparameters

1851 threshold : float

1852 ECL threshold

1853 h : float

1854 Finite difference step

1855 fallback_axis : int

1856 Axis for fallback if gradient is zero

1857 normalize : bool

1858 Normalize output direction

1859 random_dir : bool

1860 If True, return a random direction in the ridge (level set). If False, returns first basis vector.

1861 seed : int or None

1862 For reproducible random direction

1863

1864 Returns

1865 -------

1866 ridge_dir : ndarray, shape (d, 1)

1867 Ridge direction (tangent to entropy level set) at x

1868 grad_H : ndarray, shape (d, 1)

1869 Gradient of entropy at x

1870 basis : ndarray, shape (d, d-1)

1871 (If requested) Orthonormal basis for tangent space to entropy level set at x

1872 """

1873 from utils import ecl_acquisition

1874

1875 x = np.atleast_2d(x)

1876 d = x.shape[1]

1877

1878 # Entropy at x

1879 def entropy_func(x0):

1880 x0 = np.atleast_2d(x0)

1881 ray0 = np.zeros((d, 1))

1882 ray0[0, 0] = 1.0

1883 mu, var = gp.predict(

1884 x0, ray0, params, calc_cov=True, return_deriv=False)

1885 return float(ecl_acquisition(mu, var, threshold=threshold))

1886

1887 grad_H = np.zeros((d, 1))

1888 for i in range(d):

1889 dx = np.zeros_like(x)

1890 dx[0, i] = h

1891 grad_H[i, 0] = (entropy_func(x + dx) - entropy_func(x - dx)) / (2 * h)

1892

1893 # Edge case: gradient nearly zero

1894 # grad_norm = np.linalg.norm(grad_H)

1895 # if grad_norm < 1e-12:

1896 # fallback = np.zeros((d, 1))

1897 # fallback[fallback_axis, 0] = 1.0

1898 # return fallback, grad_H, None

1899

1900 # Orthonormal basis for null space (tangent space to level set)

1901 # null_space returns (d, d-1) matrix: each column is a basis vector

1902 basis = null_space(grad_H.T) # shape (d, d-1)

1903

1904 # Select a direction from the tangent space

1905 if random_dir:

1906 rng = np.random.default_rng(seed)

1907 coeffs = rng.standard_normal(basis.shape[1])

1908 ridge_dir = basis @ coeffs

1909 if normalize:

1910 ridge_dir = ridge_dir / np.linalg.norm(ridge_dir)

1911 ridge_dir = ridge_dir.reshape(-1, 1)

1912 else:

1913 # Return first basis vector (deterministic)

1914 ridge_dir = basis[:, 0]

1915 if normalize:

1916 ridge_dir = ridge_dir / np.linalg.norm(ridge_dir)

1917 ridge_dir = ridge_dir.reshape(-1, 1)

1918

1919 return ridge_dir

1920

1921

1922def get_entropy_ridge_direction_nd_2(gp, x, params, threshold=0.0, h=1e-5,

1923 fallback_axis=0, normalize=True, random_dir=False, seed=None):

1924 """

1925 Get a direction tangent to the entropy level set ("ridge direction") at x.

1926 In higher dimensions, returns either a single direction or an orthonormal basis for the tangent space.

1927

1928 Parameters

1929 ----------

1930 gp : object

1931 Trained GP model instance with .predict

1932 x : array-like, shape (1, d)

1933 The input location

1934 params : array-like

1935 GP hyperparameters

1936 threshold : float

1937 ECL threshold

1938 h : float

1939 Finite difference step

1940 fallback_axis : int

1941 Axis for fallback if gradient is zero

1942 normalize : bool

1943 Normalize output direction

1944 random_dir : bool

1945 If True, return a random direction in the ridge (level set). If False, returns first basis vector.

1946 seed : int or None

1947 For reproducible random direction

1948

1949 Returns

1950 -------

1951 ridge_dir : ndarray, shape (d, 1)

1952 Ridge direction (tangent to entropy level set) at x

1953 grad_H : ndarray, shape (d, 1)

1954 Gradient of entropy at x

1955 basis : ndarray, shape (d, d-1)

1956 (If requested) Orthonormal basis for tangent space to entropy level set at x

1957 """

1958 grad_H = get_surrogate_gradient_ray(

1959 gp, x, params, fallback_axis=0, normalize=True, threshold=0.0)

1960

1961 # Edge case: gradient nearly zero

1962 # grad_norm = np.linalg.norm(grad_H)

1963 # if grad_norm < 1e-12:

1964 # fallback = np.zeros((d, 1))

1965 # fallback[fallback_axis, 0] = 1.0

1966 # return fallback, grad_H, None

1967

1968 # Orthonormal basis for null space (tangent space to level set)

1969 # null_space returns (d, d-1) matrix: each column is a basis vector

1970 basis = null_space(grad_H.T) # shape (d, d-1)

1971

1972 best_grad = np.inf

1973 best_dir = None

1974 for idx in range(basis.shape[1]):

1975 v = basis[:, idx].reshape(-1, 1)

1976 mu = gp.predict(x, v, params, calc_cov=False, return_deriv=True)

1977 if gp.n_order >= 2:

1978 second_deriv = mu[2]

1979 else:

1980 # Use central finite difference on the GP's directional derivative

1981 x_fwd = x + h * v.T

1982 x_bwd = x - h * v.T

1983 # Predict first derivative at forward and backward positions

1984 mu_fwd = gp.predict(

1985 x_fwd, v, params, calc_cov=False, return_deriv=True)

1986 mu_bwd = gp.predict(

1987 x_bwd, v, params, calc_cov=False, return_deriv=True)

1988 # mu_fwd[1] and mu_bwd[1] are first directional derivatives at shifted positions

1989 second_deriv = (mu_fwd[1] - mu_bwd[1]) / (2 * h)

1990 if abs(second_deriv) < best_grad:

1991 best_grad = abs(second_deriv)

1992 best_dir = v

1993

1994 return best_dir

1995

1996

1997def sobol_points(n_points, box, seed=0):

1998 d = len(box)

1999 sampler = qmc.Sobol(d=d, scramble=True, seed=seed)

2000 m = int(np.ceil(np.log2(n_points)))

2001 pts = sampler.random_base2(m=m)[:n_points]

2002 for j, (lo, hi) in enumerate(box):

2003 pts[:, j] = lo + (hi - lo) * pts[:, j]

2004 return pts

2005

2006

2007def local_box_around_point(x_next, delta):

2008 # x_next: shape (1, d) or (d,)

2009 x_next = np.atleast_2d(x_next).reshape(-1)

2010 d = x_next.shape[0]

2011 return [(float(x_next[j] - delta), float(x_next[j] + delta)) for j in range(d)]

2012

2013

2014# @profile

2015# def maximize_ier_direction(

2016# gp, x_next, x_train, y_blocks, rays_array, params, box, threshold=0.0, n_integration=500, seed=123, delta=.5

2017# ):

2018# """

2019# Maximizes Integrated Entropy Reduction (IER) at x_next over directions v (unit norm).

2020# """

2021# d = x_next.shape[1]

2022# # 1. Integration points for Monte Carlo estimate

2023# local_box = local_box_around_point(x_next, delta)

2024# integration_points = sobol_points(n_integration, local_box, seed)

2025# # Use a default ray for integration points (e.g., all along first axis)

2026# ray0 = np.zeros((d, 1))

2027# ray0[0, 0] = 1.0

2028# integration_rays = np.tile(ray0, n_integration)

2029

2030# # 2. Get current mean/variance at integration points

2031# mu_before, var_before = gp.predict(

2032# integration_points, integration_rays, params, calc_cov=True, return_deriv=False

2033# )

2034# from utils import ecl_acquisition

2035# ecl_before = ecl_acquisition(mu_before, var_before, threshold=threshold)

2036

2037# @profile

2038# def negative_ier(w):

2039# v = w.reshape(-1, 1)

2040# norm_v = np.linalg.norm(v)

2041# if norm_v < 1e-12:

2042# return 1e6

2043# v = v / norm_v

2044

2045# Y_blocks = [y.copy() for y in y_blocks]

2046# # --- Normalize if needed ---

2047# v_ = v

2048# integration_rays_ = integration_rays

2049# # utils.check_gp_gradient(gp, x_next, previous_params)

2050# # Hypercomplex construction for new point

2051

2052# # Compute GP predictions (function value and derivatives)

2053# y_pred = gp.predict(x_next, v_, params,

2054# calc_cov=False, return_deriv=True).ravel()

2055

2056# # How many derivatives do you want to include?

2057# n_order = gp.n_order

2058

2059# # Rebuild y_blocks_next to include the GP predictions

2060# Y_blocks_next = []

2061

2062# # Append the function prediction

2063# Y_blocks_next.append(y_pred[0].reshape(-1, 1))

2064

2065# # Append derivatives in order (1st, 2nd, ..., n_order)

2066# for i in range(1, n_order + 1):

2067# Y_blocks_next.append(y_pred[i].reshape(-1, 1))

2068

2069# # Augment training set

2070# X_train = np.vstack([x_train, x_next])

2071# for k in range(len(y_blocks)):

2072# Y_blocks[k] = np.vstack([Y_blocks[k], Y_blocks_next[k]])

2073# rays_array_tmp = np.concatenate([rays_array, v_], axis=1)

2074

2075# gp_temp = gddegp.gddegp(X_train, Y_blocks,

2076# n_order=gp.n_order,

2077# rays_array=rays_array_tmp,

2078# normalize=True,

2079# kernel="SE",

2080# kernel_type="anisotropic",)

2081

2082# mu, var_after = gp_temp.predict(

2083# integration_points, integration_rays_, params, calc_cov=True, return_deriv=False)

2084# ecl_after = ecl_acquisition(

2085# mu, var_after, threshold=threshold)

2086# ier = np.mean(ecl_before - ecl_after)

2087# return -ier

2088# # 4. Optimize using L-BFGS-B, with a couple random restarts for robustness

2089

2090# D = d # dimension of direction

2091# lb = np.full(D, -2.0)

2092# ub = np.full(D, 2.0)

2093# # Optionally, can try [-1, 1] or any sufficiently wide box (PSO will normalize inside objective)

2094

2095# best_w, best_val = pso(

2096# negative_ier, lb, ub,

2097# pop_size=10*d, maxiter=11, debug=True, seed=seed + 77

2098# )

2099# v_opt = best_w.reshape(-1, 1)

2100# v_opt /= np.linalg.norm(v_opt)

2102# return v_opt

2105# utils.py

2108# UPDATED function to use the new bounds logic

2109# def find_next_point(

2110# gp,

2111# params,

2112# x_train,

2113# y_train_list,

2114# dist_params,

2115# acquisition_func,

2116# integration_points = None,

2117# n_candidate_points: int = 1024,

2118# n_local_starts: int = 10,

2119# use_agg_al = False,

2120# var_hist = [],

2121# **acq_kwargs

2122# ):

2123# """Finds the next point using distribution-aware optimization bounds."""

2124

2125# if not use_agg_al:

2126# # --- Stage 1: Coarse Global Search ---

2127# shape, scale = get_pdf_params(dist_params)

2128

2129# sampler = Sobol(d=x_train.shape[1], scramble=True)

2130# coarse_points_unit = sampler.random(n=n_candidate_points)

2131# candidate_points = get_inverse(dist_params, coarse_points_unit

2132# )

2133

2134# candidate_scores = acquisition_func(

2135# gp, params, x_train, y_train_list,

2136# candidate_points, integration_points = integration_points, normalize=gp.normalize

2137# )

2138

2139# top_indices = np.argsort(candidate_scores.ravel())[-n_local_starts:]

2140# top_starting_points = candidate_points[top_indices]

2141

2142# # --- Stage 2: Local Optimization ---

2143

2144# # Get the correct bounds for the optimizer

2145# bounds = get_optimization_bounds(dist_params)

2146# all_unbounded = all(b[0] is None for b in bounds)

2147

2148# def objective_function(x: np.ndarray) -> float:

2149# x_candidate = x.reshape(1, -1)

2150# score = acquisition_func(

2151# gp, params, x_train, y_train_list,

2152# x_candidate, integration_points = integration_points, normalize=gp.normalize

2153# )

2154# return -score[0]

2155

2156# best_x = None

2157# best_score = -np.inf

2158

2159# for start_point in top_starting_points:

2160# if all_unbounded:

2161# res = minimize(fun=objective_function, x0=start_point, method="BFGS")

2162# else:

2163# res = minimize(fun=objective_function, x0=start_point, method="L-BFGS-B", bounds=bounds)

2164

2165# if -res.fun > best_score:

2166# best_score = -res.fun

2167# best_x = res.x

2168

2169# return best_x.reshape(1, -1), best_score

2170# else:

2171# agg_variance = acquisition_func(var_hist)

2172# max_index = np.argmax(agg_variance)

2173# return integration_points[max_index,:], agg_variance[0,max_index]

2174

2175def _ensure_2d(x):

2176 x = np.asarray(x)

2177 if x.ndim == 1:

2178 return x.reshape(1, -1)

2179 return x

2180

2181def _format_y_to_add(y_pred):

2182 """Convert gp.predict output at one x to list of 2D arrays for y_train_list append."""

2183 if isinstance(y_pred, (list, tuple)):

2184 return [np.atleast_2d(np.asarray(e)) for e in y_pred]

2185 else:

2186 return [np.atleast_2d(np.asarray(y_pred))]

2187

2188

2189# def sample_from_dist(dist, means, var, shape, scale, size):

2190# num_cols = len(dist)

2191# num_rows = int(size)

2192# samples = np.zeros((num_rows, num_cols))

2193# for i in range(0, num_cols):

2194# if dist[i] == 'N': # normal (Gaussian)

2195# samples[:, i] = stats.norm.rvs(

2196# loc=shape[i], scale=scale[i], size=size)

2197# elif dist[i] == 'U': # uniform

2198# samples[:, i] = stats.uniform.rvs(

2199# loc=shape[i], scale=scale[i], size=size)

2200# elif dist[i] == 'LN': # log-normal

2201# samples[:, i] = stats.lognorm.rvs(

2202# scale[i], scale=np.exp(shape[i]), size=size)

2203# # elif dist[i] == 'B': # beta

2204# # frozen_dist = stats.beta(a=shape[i], b=scale[i])

2205# # transformed_samples[:, i] = frozen_dist.ppf(samples[:, i])

2206# elif dist[i] == 'T': # triangle (symmetric)

2207# c = .5

2208# samples[:, i] = stats.triang.rvs(

2209# c, loc=shape[i], scale=scale[i], size=size)

2210# elif dist[i] == 'UTN':

2211# # samples[:, i] = stats.uniform.rvs(

2212# # loc=shape[i], scale=scale[i] - shape[i], size=size)

2213# cf = 3

2214# # lb = shape[i] + cf*shape[i]

2215# # ub = scale[i] - cf*shape[i]

2216# # transformed_samples[:, i] = (

2217# # ub - lb)*samples[:, i] + lb

2218# a, b = (shape[i] - means[i]) / \

2219# (cf*var[i]*means[i]), (scale[i] -

2220# means[i]) / (cf*var[i]*means[i])

2221# samples[:, i] = stats.truncnorm.rvs(

2222# a, b, loc=means[i], scale=cf*var[i]*means[i], size=size)

2223

2224# return samples

2225def get_pdf_params(dist_params):

2226 """

2227 Computes scipy-specific loc/scale parameters. Prioritizes explicit bounds if provided.

2228 """

2229 dists = dist_params['dists']

2230 nVar = len(dists)

2231 rv_a = np.zeros(nVar) # loc

2232 rv_b = np.zeros(nVar) # scale

2233

2234 # Check for optional explicit bounds

2235 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params

2236

2237 for i, dist_name in enumerate(dists):

2238 # --- Logic for when explicit bounds ARE provided ---

2239 if has_bounds and dist_name in ['U', 'TN']:

2240 lb = dist_params['lower_bounds'][i]

2241 ub = dist_params['upper_bounds'][i]

2242 if dist_name == 'U':

2243 rv_a[i] = lb # loc = lower bound

2244 rv_b[i] = ub - lb # scale = width

2245 elif dist_name == 'TN':

2246 rv_a[i] = dist_params['means'][i]

2247 rv_b[i] = np.sqrt(dist_params['vars'][i])

2248

2249 # --- Fallback logic when bounds are NOT provided ---

2250 else:

2251 stdev = np.sqrt(dist_params['vars'][i])

2252 mean = dist_params['means'][i]

2253 if dist_name == 'U':

2254 rv_a[i] = mean - np.sqrt(3) * stdev

2255 rv_b[i] = 2 * np.sqrt(3) * stdev

2256 elif dist_name == 'N':

2257 rv_a[i] = mean

2258 rv_b[i] = stdev

2259 # (Add other fallbacks here if needed)

2260

2261 return rv_a, rv_b

2262

2263def get_optimization_bounds(dist_params):

2264 """

2265 Determines optimization bounds. Prioritizes explicit bounds if provided.

2266 """

2267 bounds = []

2268 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params

2269

2270 for i, dist_name in enumerate(dist_params['dists']):

2271 # --- Use explicit bounds if available for bounded distributions ---

2272 if has_bounds and dist_name in ['U', 'TN', 'B']:

2273 bounds.append((dist_params['lower_bounds'][i], dist_params['upper_bounds'][i]))

2274

2275 # --- Fallback logic or for other distributions ---

2276 elif dist_name == 'B': # Beta is on [0, 1] by default

2277 bounds.append((0, 1))

2278 elif dist_name in ['N', 'LN']: # Unbounded

2279 bounds.append((None, None))

2280 else: # Fallback for U/TN if bounds were not explicitly given

2281 stdev = np.sqrt(dist_params['vars'][i])

2282 mean = dist_params['means'][i]

2283 if dist_name == 'U':

2284 lb = mean - np.sqrt(3) * stdev

2285 ub = mean + np.sqrt(3) * stdev

2286 bounds.append((lb, ub))

2287 elif dist_name == 'TN':

2288 lb = mean - 3 * stdev

2289 ub = mean + 3 * stdev

2290 bounds.append((lb, ub))

2291 else:

2292 bounds.append((None, None))

2293 return bounds

2294

2295

2296def get_inverse(dist_params, samples):

2297 """

2298 Transforms uniform samples to a specified distribution via inverse CDF.

2299 """

2300 dists = dist_params['dists']

2301 loc_params, scale_params = get_pdf_params(dist_params)

2302 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params

2303

2304 transformed_samples = np.zeros(samples.shape)

2305 for i in range(samples.shape[1]):

2306 dist_name = dists[i]

2307 loc = loc_params[i]

2308 scale = scale_params[i]

2309

2310 if dist_name == 'TN':

2311 mean = dist_params['means'][i]

2312 stdev = np.sqrt(dist_params['vars'][i])

2313 if has_bounds:

2314 lb = dist_params['lower_bounds'][i]

2315 ub = dist_params['upper_bounds'][i]

2316 else: # Fallback

2317 lb = mean - 3 * stdev

2318 ub = mean + 3 * stdev

2319

2320 a, b = (lb - mean) / stdev, (ub - mean) / stdev

2321 frozen_dist = stats.truncnorm(a, b, loc=mean, scale=stdev)

2322

2323 elif dist_name == 'U':

2324 frozen_dist = stats.uniform(loc=loc, scale=scale)

2325

2326 elif dist_name == 'N':

2327 frozen_dist = stats.norm(loc=loc, scale=scale)

2328

2329 # (Add other distributions here)

2330

2331 transformed_samples[:, i] = frozen_dist.ppf(samples[:, i])

2332 return transformed_samples

2333

2334# (The find_next_point function does not need to be changed from the previous version)