Coverage for jetgp/utils.py: 51%

697 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2026-04-03 15:14 -0500

1import numpy as np 

2import pyoti.core as coti 

3import pyoti.sparse as oti 

4import sympy as sp 

5from scipy.stats import qmc 

6from scipy.optimize import minimize 

7from scipy.stats import norm 

8import sys 

9from scipy.linalg import null_space 

10#from full_gddegp import gddegp 

11#from line_profiler import profile 

12from scipy.stats.qmc import Sobol 

13import scipy.stats as stats 

14from fractions import Fraction 

15from scipy.special import comb 

16from numpy.polynomial import Polynomial 

17import math 

18#import utils 

19from jetgp.full_degp import degp_utils 

20 

21 

22def scale_samples(samples, lower_bounds, upper_bounds): 

23 """ 

24 Scale each column of samples from the unit interval [0, 1] to user-defined bounds [lb_j, ub_j]. 

25 

26 Parameters: 

27 ---------- 

28 samples : ndarray of shape (d, n) 

29 A 2D array where each column represents a sample in [0, 1]^n. 

30 lower_bounds : array-like of length n 

31 Lower bounds for each dimension. 

32 upper_bounds : array-like of length n 

33 Upper bounds for each dimension. 

34 

35 Returns: 

36 ------- 

37 ndarray of shape (d, n) 

38 Scaled samples where each column is mapped from [0, 1] to [lb_j, ub_j] for each dimension. 

39 

40 Notes: 

41 ----- 

42 This function assumes that each sample is a column vector, and bounds are applied column-wise. 

43 

44 Example: 

45 -------- 

46 >>> samples = np.array([[0.5, 0.2], [0.8, 0.4]]) 

47 >>> lower_bounds = [0, 1] 

48 >>> upper_bounds = [1, 3] 

49 >>> scale_samples(samples, lower_bounds, upper_bounds) 

50 array([[0.5, 1.4], 

51 [0.8, 1.8]]) 

52 """ 

53 

54 samples = np.asarray(samples) 

55 lower_bounds = np.asarray(lower_bounds) 

56 upper_bounds = np.asarray(upper_bounds) 

57 

58 # Ensure correct shapes 

59 assert ( 

60 samples.shape[1] == len(lower_bounds) == len(upper_bounds) 

61 ), "Dimension mismatch between samples and bounds" 

62 

63 # Reshape bounds to broadcast across rows 

64 lb = lower_bounds[np.newaxis, :] 

65 ub = upper_bounds[np.newaxis, :] 

66 

67 return lb + samples * (ub - lb) 

68 

69 

70def flatten_der_indices(indices): 

71 """ 

72 Flatten a nested list of derivative indices. 

73 

74 Parameters: 

75 ---------- 

76 indices : list of lists 

77 A nested list where each sublist contains derivative index specifications. 

78 

79 Returns: 

80 ------- 

81 list 

82 A single flattened list containing all derivative index entries. 

83 

84 Example: 

85 -------- 

86 >>> indices = [[[1, 1]], [[1, 2], [2, 1]]] 

87 >>> flatten_der_indices(indices) 

88 [[1, 1], [1, 2], [2, 1]] 

89 """ 

90 

91 flattened_indices = [] 

92 for i in range(0, len(indices)): 

93 for j in range(0, len(indices[i])): 

94 flattened_indices.append(indices[i][j]) 

95 return flattened_indices 

96 

97 

98def nrmse(y_true, y_pred, norm_type="minmax"): 

99 """ 

100 Compute the Normalized Root Mean Squared Error (NRMSE) between true and predicted values. 

101 

102 Parameters: 

103 ---------- 

104 y_true : array-like 

105 Ground truth or reference values. 

106 y_pred : array-like 

107 Predicted values to compare against the ground truth. 

108 norm_type : str, default="minmax" 

109 The method used to normalize the RMSE: 

110 - 'minmax': Normalize by the range (max - min) of `y_true`. 

111 - 'mean': Normalize by the mean of `y_true`. 

112 - 'std': Normalize by the standard deviation of `y_true`. 

113 

114 Returns: 

115 ------- 

116 float 

117 The normalized root mean squared error. 

118 

119 Raises: 

120 ------- 

121 ValueError 

122 If `norm_type` is not one of {'minmax', 'mean', 'std'}. 

123 

124 Example: 

125 -------- 

126 >>> y_true = np.array([3, 5, 2, 7]) 

127 >>> y_pred = np.array([2.5, 5.5, 2, 8]) 

128 >>> nrmse(y_true, y_pred, norm_type="mean") 

129 0.1443 # Example value (varies depending on input) 

130 """ 

131 

132 y_true = np.asarray(y_true) 

133 y_pred = np.asarray(y_pred) 

134 

135 mse = np.mean((y_true - y_pred) ** 2) 

136 rmse = np.sqrt(mse) 

137 

138 if norm_type == "minmax": 

139 norm = np.max(y_true) - np.min(y_true) 

140 elif norm_type == "mean": 

141 norm = np.mean(y_true) 

142 elif norm_type == "std": 

143 norm = np.std(y_true) 

144 else: 

145 raise ValueError("norm_type must be 'minmax', 'mean', or 'std'") 

146 

147 return rmse / norm if norm != 0 else np.inf 

148 

149 

150def convert_index_to_exponent_form(lst): 

151 """ 

152 Convert a list of indices into exponent form. 

153 

154 For a given list of integers, the function compresses consecutive identical elements 

155 into pairs [value, count], where 'value' is the element and 'count' is its occurrence. 

156 

157 Parameters: 

158 ---------- 

159 lst : list of int 

160 A list of integers representing variable indices. 

161 

162 Returns: 

163 ------- 

164 list of [int, int] 

165 A compressed list where each entry is [value, count], representing the multiplicity 

166 of each unique value in the original list. 

167 

168 Example: 

169 -------- 

170 >>> lst = [1, 1, 2, 2, 2, 3] 

171 >>> convert_index_to_exponent_form(lst) 

172 [[1, 2], [2, 3], [3, 1]] 

173 """ 

174 

175 compressed = [] 

176 current_num = None 

177 count = 0 

178 

179 for num in lst: 

180 if num != current_num: 

181 if current_num is not None: 

182 compressed.append([current_num, count]) 

183 current_num = num 

184 count = 1 

185 else: 

186 count += 1 

187 

188 if current_num is not None: 

189 compressed.append([current_num, count]) 

190 

191 return compressed 

192 

193 

194def build_companion_array(nvars, order, der_indices): 

195 """ 

196 Build a companion array that maps each derivative index to its corresponding order in the OTI basis. 

197 

198 The array represents the order of each component in the OTI (Order-Truncated Imaginary) number system: 

199 - 0 for function values. 

200 - Corresponding derivative order for each derivative term. 

201 

202 Parameters: 

203 ---------- 

204 nvars : int 

205 Number of variables (input dimensions). 

206 order : int 

207 Maximum derivative order considered. 

208 der_indices : list of lists 

209 Derivative indices in exponent form, where each sublist represents the derivative multi-index 

210 for a specific derivative term. 

211 

212 Returns: 

213 ------- 

214 companion_array : ndarray 

215 A 1D array of length (1 + total derivatives), where: 

216 - The first entry is 0 (function value). 

217 - Each subsequent entry indicates the derivative order (e.g., 1 for first derivatives). 

218 

219 Example: 

220 -------- 

221 >>> nvars = 2 

222 >>> order = 2 

223 >>> der_indices = [[[1, 1]], [[1, 2]], [[2, 1]]] 

224 >>> build_companion_array(nvars, order, der_indices) 

225 array([0, 1, 2, 1]) 

226 """ 

227 companion_list = [0] 

228 for i in range(len(der_indices)): 

229 for j in range(0, len(der_indices[i])): 

230 companion_list.append( 

231 compare_OTI_indices(nvars, order, der_indices[i][j])) 

232 

233 companion_array = np.array(companion_list) 

234 return companion_array 

235 

236def build_companion_array_predict(nvars, order, der_indices): 

237 """ 

238 Build a companion array that maps each derivative index to its corresponding order in the OTI basis. 

239 

240 The array represents the order of each component in the OTI (Order-Truncated Imaginary) number system: 

241 - 0 for function values. 

242 - Corresponding derivative order for each derivative term. 

243 

244 Parameters: 

245 ---------- 

246 nvars : int 

247 Number of variables (input dimensions). 

248 order : int 

249 Maximum derivative order considered. 

250 der_indices : list of lists 

251 Derivative indices in exponent form, where each sublist represents the derivative multi-index 

252 for a specific derivative term. 

253 

254 Returns: 

255 ------- 

256 companion_array : ndarray 

257 A 1D array of length (1 + total derivatives), where: 

258 - The first entry is 0 (function value). 

259 - Each subsequent entry indicates the derivative order (e.g., 1 for first derivatives). 

260 

261 Example: 

262 -------- 

263 >>> nvars = 2 

264 >>> order = 2 

265 >>> der_indices = [[[1, 1]], [[1, 2]], [[2, 1]]] 

266 >>> build_companion_array(nvars, order, der_indices) 

267 array([0, 1, 2, 1]) 

268 """ 

269 companion_list = [0] 

270 for i in range(len(der_indices)): 

271 companion_list.append( 

272 compare_OTI_indices(nvars, order, der_indices[i])) 

273 

274 companion_array = np.array(companion_list) 

275 return companion_array 

276 

277 

278def compare_OTI_indices(nvars, order, term_check): 

279 """ 

280 Compare a given multi-index term against all basis terms in the OTI number system. 

281 

282 This function searches through all OTI basis terms (multi-indices) up to the specified order 

283 and identifies the **order** of the matching term. 

284 

285 Parameters: 

286 ---------- 

287 nvars : int 

288 Number of variables (input dimensions). 

289 order : int 

290 Maximum derivative order considered. 

291 term_check : list of [int, int] 

292 The multi-index to check, given in exponent form [[var_index, exponent], ...]. 

293 

294 Returns: 

295 ------- 

296 int 

297 The order of the term in the OTI basis (1, 2, ..., order). 

298 Returns -1 if no matching term is found. 

299 

300 Example: 

301 -------- 

302 >>> nvars = 2 

303 >>> order = 2 

304 >>> term_check = [[1, 2]] # Represents ∂²/∂x₁² 

305 >>> compare_OTI_indices(nvars, order, term_check) 

306 2 

307 """ 

308 dH = coti.get_dHelp() 

309 

310 for ordi in range(1, order + 1): 

311 nterms = coti.ndir_order(nvars, ordi) 

312 for idx in range(nterms): 

313 term = convert_index_to_exponent_form(dH.get_fulldir(idx, ordi)) 

314 if np.array_equal(term, term_check): 

315 return ordi 

316 return -1 

317 

318 

319def gen_OTI_indices(nvars, order): 

320 """ 

321 Generate the list of OTI (Order-Truncated Imaginary) basis indices in exponent form. 

322 

323 For a given number of variables and maximum derivative order, this function produces the 

324 multi-index representations for all basis terms in the OTI number system. 

325 

326 Parameters: 

327 ---------- 

328 nvars : int 

329 Number of variables (input dimensions). 

330 order : int 

331 Maximum derivative order considered. 

332 

333 Returns: 

334 ------- 

335 list of lists 

336 A nested list where: 

337 - The outer list has length `order` (one entry per derivative order). 

338 - Each inner list contains multi-indices in exponent form for that order. 

339 

340 Example: 

341 -------- 

342 >>> nvars = 2 

343 >>> order = 2 

344 >>> gen_OTI_indices(nvars, order) 

345 [ 

346 [[[1, 1]], [[2, 1]]], # First-order derivatives: ∂/∂x₁, ∂/∂x₂ 

347 # Second-order: ∂²/∂x₁², ∂²/∂x₁∂x₂, ∂²/∂x₂² 

348 [[[1, 2]], [[1, 1], [2, 1]], [[2, 2]]] 

349 ] 

350 """ 

351 dH = coti.get_dHelp() 

352 

353 ind = [0] * order 

354 

355 for ordi in range(1, order + 1): 

356 nterms = coti.ndir_order(nvars, ordi) 

357 i = [0] * nterms 

358 for idx in range(nterms): 

359 i[idx] = convert_index_to_exponent_form(dH.get_fulldir(idx, ordi)) 

360 ind[ordi - 1] = i 

361 return ind 

362 

363 

364def reshape_y_train(y_train): 

365 """ 

366 Flatten and concatenate function values and derivative observations into a single 1D array. 

367 

368 Parameters: 

369 ---------- 

370 y_train : list of arrays 

371 A list where: 

372 - y_train[0] contains the function values (shape: (n_samples,)) 

373 - y_train[1:], if present, contain derivative values (shape: (n_samples,)) for each derivative component. 

374 

375 Returns: 

376 ------- 

377 ndarray of shape (n_total,) 

378 A flattened 1D array concatenating function values and all derivatives. 

379 

380 Example: 

381 -------- 

382 >>> y_train = [np.array([1.0, 2.0]), np.array([0.5, 1.0])] 

383 >>> reshape_y_train(y_train) 

384 array([1.0, 2.0, 0.5, 1.0]) 

385 """ 

386 y_train_flattened = y_train[0] 

387 for i in range(1, len(y_train)): 

388 y_train_flattened = np.vstack( 

389 (y_train_flattened.reshape(-1, 1), y_train[i].reshape(-1, 1))) 

390 return y_train_flattened.flatten() 

391 

392 

393def transform_cov(cov, sigma_y, sigmas_x, der_indices, X_test): 

394 """ 

395 Rescale the diagonal of a covariance matrix to reflect the original (unnormalized) variance 

396 of function values and derivatives. 

397 

398 This function transforms the variance estimates from normalized space back to the original scale. 

399 

400 Parameters: 

401 ---------- 

402 cov : ndarray of shape (n_total, n_total) 

403 Covariance matrix from the GP model (including function values and derivatives). 

404 sigma_y : float 

405 Standard deviation used to normalize the function values. 

406 sigmas_x : ndarray of shape (1, nvars) 

407 Standard deviations used to normalize each input dimension. 

408 der_indices : list of lists 

409 Derivative multi-indices, where each sublist represents the derivative directions and orders. 

410 X_test : ndarray of shape (n_samples, nvars) 

411 Test input points corresponding to the covariance matrix blocks. 

412 

413 Returns: 

414 ------- 

415 y_var_normalized : ndarray of shape (n_total,) 

416 Rescaled variances for function values and derivatives in the original space. 

417 

418 Example: 

419 -------- 

420 >>> cov.shape = (n_total, n_total) 

421 >>> transform_cov(cov, sigma_y, sigmas_x, der_indices, X_test).shape == (n_total,) 

422 """ 

423 var = abs(np.diag(cov)) 

424 y_var_normalized = np.zeros((var.shape[0],)) 

425 y_var_normalized[0:X_test.shape[0]] = var[0:X_test.shape[0]]*sigma_y**2 

426 for i in range(len(der_indices)): 

427 factor = 1 

428 for j in range(len(der_indices[i])): 

429 factor = factor * \ 

430 (sigmas_x[0][der_indices[i][j][0] - 1])**(der_indices[i][j][1]) 

431 factor = sigma_y**2 / factor**2 

432 y_var_normalized[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0] 

433 ] = var[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]]*factor 

434 return y_var_normalized 

435 

436 

437def transform_cov_directional(cov, sigma_y, sigmas_x, der_indices, X_test): 

438 """ 

439 Rescale the diagonal of a covariance matrix for function values and directional derivatives. 

440 

441 Unlike `transform_cov`, this function assumes **directional derivatives** (not multi-index derivatives), 

442 so no input scaling (`sigmas_x`) is applied to derivative terms. 

443 

444 Parameters: 

445 ---------- 

446 cov : ndarray of shape (n_total, n_total) 

447 Covariance matrix from the GP model (including function values and directional derivatives). 

448 sigma_y : float 

449 Standard deviation used to normalize the function values. 

450 sigmas_x : ndarray of shape (1, nvars) 

451 Standard deviations used to normalize each input dimension (unused for derivatives here). 

452 der_indices : list of lists 

453 Derivative directions (for directional derivatives). 

454 X_test : ndarray of shape (n_samples, nvars) 

455 Test input points corresponding to the covariance matrix blocks. 

456 

457 Returns: 

458 ------- 

459 y_var_normalized : ndarray of shape (n_total,) 

460 Rescaled variances for function values and directional derivatives in the original space. 

461 

462 Example: 

463 -------- 

464 >>> cov.shape = (n_total, n_total) 

465 >>> transform_cov_directrional(cov, sigma_y, sigmas_x, der_indices, X_test).shape == (n_total,) 

466 """ 

467 var = abs(np.diag(cov)) 

468 y_var_normalized = np.zeros((var.shape[0],)) 

469 y_var_normalized[0:X_test.shape[0]] = var[0:X_test.shape[0]]*sigma_y**2 

470 for i in range(len(der_indices)): 

471 factor = sigma_y**2 

472 y_var_normalized[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0] 

473 ] = var[(i+1) * X_test.shape[0]: (i+2) * X_test.shape[0]]*factor 

474 return y_var_normalized 

475 

476 

477def transform_predictions(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test): 

478 """ 

479 Rescale predicted function values and derivatives from normalized space back to their original scale. 

480 

481 This function transforms both function value predictions and **multi-index derivatives** 

482 back to the original units after GP prediction. 

483 

484 Parameters: 

485 ---------- 

486 y_pred : ndarray of shape (n_total,) 

487 Predicted mean values from the GP model in normalized space 

488 (includes function values and derivatives). 

489 mu_y : float 

490 Mean of the original function values (before normalization). 

491 sigma_y : float 

492 Standard deviation of the original function values (before normalization). 

493 sigmas_x : ndarray of shape (1, nvars) 

494 Standard deviations of the input variables (used for rescaling derivatives). 

495 der_indices : list of lists 

496 Multi-index derivative structures for each derivative component. 

497 X_test : ndarray of shape (n_samples, nvars) 

498 Test input points corresponding to the prediction blocks. 

499 

500 Returns: 

501 ------- 

502 y_train_normalized : ndarray of shape (n_total, 1) 

503 Rescaled function values and derivatives in the original scale. 

504 

505 Example: 

506 -------- 

507 >>> y_pred.shape = (n_total,) 

508 >>> transform_predictions(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test).shape 

509 (n_total, 1) 

510 """ 

511 y_train_normalized = ( 

512 mu_y + y_pred[0:X_test.shape[0]]*sigma_y).reshape(-1, 1) 

513 for i in range(len(der_indices)): 

514 factor = sigma_y 

515 for j in range(len(der_indices[i])): 

516 factor = factor / \ 

517 (sigmas_x[0][der_indices[i][j][0] - 1])**(der_indices[i][j][1]) 

518 y_train_normalized = np.vstack((y_train_normalized, (y_pred[( 

519 i + 1) * X_test.shape[0]: (i + 2) * X_test.shape[0]] * factor[0, 0]).reshape(-1, 1))) 

520 return y_train_normalized 

521 

522 

523def transform_predictions_directional(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test): 

524 """ 

525 Rescale predicted function values and **directional derivatives** from normalized space 

526 back to their original scale. 

527 

528 This function assumes the derivatives are **directional** (not multi-index), 

529 so it applies only output scaling (sigma_y) to derivatives. 

530 

531 Parameters: 

532 ---------- 

533 y_pred : ndarray of shape (n_total,) 

534 Predicted mean values from the GP model in normalized space 

535 (includes function values and directional derivatives). 

536 mu_y : float 

537 Mean of the original function values (before normalization). 

538 sigma_y : float 

539 Standard deviation of the original function values (before normalization). 

540 sigmas_x : ndarray of shape (1, nvars) 

541 Standard deviations of the input variables (not used here but included for compatibility). 

542 der_indices : list of lists 

543 Directions for directional derivatives (each sublist represents a direction vector). 

544 X_test : ndarray of shape (n_samples, nvars) 

545 Test input points corresponding to the prediction blocks. 

546 

547 Returns: 

548 ------- 

549 y_train_normalized : ndarray of shape (n_total, 1) 

550 Rescaled function values and directional derivatives in the original scale. 

551 

552 Example: 

553 -------- 

554 >>> y_pred.shape = (n_total,) 

555 >>> transform_predictions_directional(y_pred, mu_y, sigma_y, sigmas_x, der_indices, X_test).shape 

556 (n_total, 1) 

557 """ 

558 y_train_normalized = ( 

559 mu_y + y_pred[0:X_test.shape[0]]*sigma_y).reshape(-1, 1) 

560 for i in range(len(der_indices)): 

561 factor = sigma_y 

562 y_train_normalized = np.vstack((y_train_normalized, (y_pred[( 

563 i + 1) * X_test.shape[0]: (i + 2) * X_test.shape[0]] * factor[0, 0]).reshape(-1, 1))) 

564 return y_train_normalized 

565 

566 

567def normalize_x_data_test(X_test, sigmas_x, mus_x): 

568 """ 

569 Normalize test input data using the mean and standard deviation from the training inputs. 

570 

571 Parameters: 

572 ---------- 

573 X_test : ndarray of shape (n_samples, nvars) 

574 Test input points to be normalized. 

575 sigmas_x : ndarray of shape (1, nvars) 

576 Standard deviations of the training inputs for each variable (used for scaling). 

577 mus_x : ndarray of shape (1, nvars) 

578 Means of the training inputs for each variable (used for centering). 

579 

580 Returns: 

581 ------- 

582 X_train_normalized : ndarray of shape (n_samples, nvars) 

583 Normalized test inputs. 

584 

585 Example: 

586 -------- 

587 >>> X_test = np.array([[2.0, 3.0]]) 

588 >>> sigmas_x = np.array([[1.0, 2.0]]) 

589 >>> mus_x = np.array([[0.0, 1.0]]) 

590 >>> normalize_x_data_test(X_test, sigmas_x, mus_x) 

591 array([[2.0, 1.0]]) 

592 """ 

593 

594 X_train_normalized = (X_test - mus_x) / sigmas_x 

595 

596 return X_train_normalized 

597 

598 

599def normalize_x_data_train(X_train): 

600 """ 

601 Normalize training input data by centering and scaling each variable. 

602 

603 Parameters: 

604 ---------- 

605 X_train : ndarray of shape (n_samples, nvars) 

606 Training input points. 

607 

608 Returns: 

609 ------- 

610 X_train_normalized : ndarray of shape (n_samples, nvars) 

611 Normalized training inputs. 

612 mean_vec_x : ndarray of shape (1, nvars) 

613 Mean values for each input variable (used for centering). 

614 std_vec_x : ndarray of shape (1, nvars) 

615 Standard deviations for each input variable (used for scaling). 

616 

617 Example: 

618 -------- 

619 >>> X_train = np.array([[1.0, 2.0], [3.0, 4.0]]) 

620 >>> normalize_x_data_train(X_train) 

621 (array([[-1., -1.], [ 1., 1.]]), array([[2., 3.]]), array([[1., 1.]])) 

622 """ 

623 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1) 

624 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1) 

625 

626 X_train_normalized = (X_train - mean_vec_x) / std_vec_x 

627 

628 return X_train_normalized 

629 

630 

631def normalize_directions(sigmas_x, rays): 

632 """ 

633 Normalize direction vectors (rays) based on input scaling. 

634 

635 This function rescales direction vectors used for **directional derivatives** so that 

636 they are consistent with normalized input space. 

637 

638 Parameters: 

639 ---------- 

640 sigmas_x : ndarray of shape (1, nvars) 

641 Standard deviations of the input variables (used for scaling each direction). 

642 rays : ndarray of shape (nvars, n_directions) 

643 Direction vectors (columns) in the original input space. 

644 

645 Returns: 

646 ------- 

647 transformed_rays : ndarray of shape (nvars, n_directions) 

648 Normalized direction vectors. 

649 

650 Example: 

651 -------- 

652 >>> sigmas_x = np.array([[2.0, 1.0]]) 

653 >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]]) 

654 >>> normalize_directions(sigmas_x, rays) 

655 array([[0.5, 0. ], 

656 [0. , 1. ]]) 

657 """ 

658 transformed_rays = np.zeros(rays.shape) 

659 for i in range(rays.shape[1]): 

660 transformed_rays[:, i] = np.diag(1/sigmas_x.flatten()) @ rays[:, i] 

661 return transformed_rays 

662 

663 

664# def normalize_directions_2(sigmas_x, rays): 

665# """ 

666# Normalize direction vectors (rays) based on input scaling. 

667 

668# This function rescales direction vectors used for **directional derivatives** so that 

669# they are consistent with normalized input space. 

670 

671# Parameters: 

672# ---------- 

673# sigmas_x : ndarray of shape (1, nvars) 

674# Standard deviations of the input variables (used for scaling each direction). 

675# rays : ndarray of shape (nvars, n_directions) 

676# Direction vectors (columns) in the original input space. 

677 

678# Returns: 

679# ------- 

680# transformed_rays : ndarray of shape (nvars, n_directions) 

681# Normalized direction vectors. 

682 

683# Example: 

684# -------- 

685# >>> sigmas_x = np.array([[2.0, 1.0]]) 

686# >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]]) 

687# >>> normalize_directions(sigmas_x, rays) 

688# array([[0.5, 0. ], 

689# [0. , 1. ]]) 

690# """ 

691# transformed_rays_list = [] 

692# for i in range(len(rays)): 

693 

694# transformed_rays = np.zeros(rays[i].shape) 

695# for j in range(transformed_rays.shape[1]): 

696# transformed_rays[:, j] = np.diag( 

697# 1/sigmas_x.flatten()) @ rays[i][:, j] 

698# transformed_rays_list.append(transformed_rays) 

699# return transformed_rays_list 

700 

701 

702def normalize_directions_2(sigmas_x, rays_array): 

703 """ 

704 Normalize direction vectors (rays) based on input scaling. 

705 

706 This function rescales direction vectors used for **directional derivatives** so that 

707 they are consistent with normalized input space. 

708 

709 Parameters: 

710 ---------- 

711 sigmas_x : ndarray of shape (1, nvars) 

712 Standard deviations of the input variables (used for scaling each direction). 

713 rays : ndarray of shape (nvars, n_directions) 

714 Direction vectors (columns) in the original input space. 

715 

716 Returns: 

717 ------- 

718 transformed_rays : ndarray of shape (nvars, n_directions) 

719 Normalized direction vectors. 

720 

721 Example: 

722 -------- 

723 >>> sigmas_x = np.array([[2.0, 1.0]]) 

724 >>> rays = np.array([[1.0, 0.0], [0.0, 1.0]]) 

725 >>> normalize_directions(sigmas_x, rays) 

726 array([[0.5, 0. ], 

727 [0. , 1. ]]) 

728 """ 

729 

730 return [rays_array[i] / sigmas_x.flatten()[:, None] for i in range(len(rays_array))] 

731 

732 

733def normalize_y_data(X_train, y_train, sigma_data, der_indices): 

734 """ 

735 Normalize function values, derivatives, and observational noise for training data. 

736 

737 This function: 

738 - Normalizes function values (`y_train[0]`) to have zero mean and unit variance. 

739 - Scales **derivatives** using the chain rule, considering input normalization. 

740 - Scales **observational noise** (`sigma_data`) accordingly. 

741 

742 Parameters: 

743 ---------- 

744 X_train : ndarray of shape (n_samples, nvars) 

745 Training input points (used to compute input normalization statistics). 

746 y_train : list of arrays 

747 List where: 

748 - y_train[0] contains function values. 

749 - y_train[1:], if present, contain derivative values for each derivative component. 

750 sigma_data : float or None 

751 Standard deviation of the observational noise (for function values). 

752 If provided, it will be normalized. 

753 der_indices : list of lists 

754 Multi-index derivative structures for each derivative component. 

755 

756 Returns: 

757 ------- 

758 y_train_normalized : ndarray of shape (n_total,) 

759 Normalized function values and derivatives (flattened). 

760 mean_vec_y : ndarray of shape (m, 1) 

761 Mean of function values before normalization. 

762 std_vec_y : ndarray of shape (m, 1) 

763 Standard deviation of function values before normalization. 

764 std_vec_x : ndarray of shape (1, nvars) 

765 Standard deviations of input variables. 

766 mean_vec_x : ndarray of shape (1, nvars) 

767 Means of input variables. 

768 noise_std_normalized : float or None 

769 Normalized observational noise standard deviation. 

770 

771 Example: 

772 -------- 

773 >>> normalize_y_data(X_train, y_train, sigma_data=0.75, der_indices=[[[1, 1]]]) 

774 (y_train_normalized, mean_vec_y, std_vec_y, 

775 std_vec_x, mean_vec_x, noise_std_normalized) 

776 """ 

777 

778 num_points = len(X_train) 

779 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1) 

780 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1) 

781 

782 mean_vec_y = np.mean(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1) 

783 std_vec_y = np.std(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1) 

784 

785 y_train_normalized = (y_train[0] - mean_vec_y)/std_vec_y 

786 noise_std_normalized = sigma_data 

787 for i in range(len(der_indices)): 

788 factor = 1/std_vec_y 

789 for j in range(len(der_indices[i])): 

790 factor = factor * \ 

791 (std_vec_x[0][der_indices[i][j][0] - 1] 

792 )**(der_indices[i][j][1]) 

793 y_train_normalized = np.vstack( 

794 (y_train_normalized.reshape(-1, 1), y_train[i + 1] * factor[0, 0])) 

795 if sigma_data is not None: 

796 noise_std_normalized[( 

797 i + 1)*num_points:(i+2)*num_points] = sigma_data[( 

798 i + 1)*num_points:(i+2)*num_points] * factor[0, 0] 

799 else: 

800 noise_std_normalized = None 

801 # Scale noise if provided 

802 

803 return y_train_normalized.flatten(), mean_vec_y, std_vec_y, std_vec_x, mean_vec_x, noise_std_normalized 

804 

805 

806def normalize_y_data_directional(X_train, y_train, sigma_data, der_indices): 

807 """ 

808 Normalize function values and **directional derivatives** for training data. 

809 

810 This function: 

811 - Normalizes **function values (`y_train[0]`)** to have zero mean and unit variance. 

812 - Scales **directional derivatives** (`y_train[1:]`) by the **function value standard deviation (`std_vec_y`)**. 

813 

814 Parameters: 

815 ---------- 

816 X_train : ndarray of shape (n_samples, nvars) 

817 Training input points (used to compute input normalization statistics). 

818 y_train : list of arrays 

819 List where: 

820 - y_train[0] contains function values. 

821 - y_train[1:], if present, contain directional derivative values for each direction. 

822 der_indices : list of lists 

823 Directions for directional derivatives (each sublist represents a direction vector). 

824 

825 Returns: 

826 ------- 

827 y_train_normalized : ndarray of shape (n_total,) 

828 Normalized function values and directional derivatives (flattened). 

829 mean_vec_y : ndarray of shape (m, 1) 

830 Mean of function values before normalization. 

831 std_vec_y : ndarray of shape (m, 1) 

832 Standard deviation of function values before normalization. 

833 std_vec_x : ndarray of shape (1, nvars) 

834 Standard deviations of input variables. 

835 mean_vec_x : ndarray of shape (1, nvars) 

836 Means of input variables. 

837 

838 Example: 

839 -------- 

840 >>> normalize_y_data_directional(X_train, y_train, der_indices=[[[1, 0.5], [2, 0.5]]]) 

841 (y_train_normalized, mean_vec_y, std_vec_y, std_vec_x, mean_vec_x) 

842 """ 

843 mean_vec_x = np.mean(X_train, axis=0).reshape(1, -1) # shape: (m, 1) 

844 std_vec_x = np.std(X_train, axis=0).reshape(1, -1) # shape: (m, 1) 

845 

846 mean_vec_y = np.mean(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1) 

847 std_vec_y = np.std(y_train[0], axis=0).reshape(-1, 1) # shape: (m, 1) 

848 

849 y_train_normalized = (y_train[0] - mean_vec_y)/std_vec_y 

850 

851 for i in range(len(der_indices)): 

852 factor = 1/std_vec_y 

853 y_train_normalized = np.vstack( 

854 (y_train_normalized.reshape(-1, 1), y_train[i + 1] * factor[0, 0])) 

855 if sigma_data is not None: 

856 noise_std_normalized = sigma_data / std_vec_y[0, 0] 

857 else: 

858 noise_std_normalized = None 

859 

860 return y_train_normalized.flatten(), mean_vec_y, std_vec_y, std_vec_x, mean_vec_x, noise_std_normalized 

861 

862def generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices): 

863 """ 

864 Generate diagonal noise covariance matrices for each submodel component 

865 (including function values and their associated derivatives). 

866 

867 This function constructs a list of diagonal matrices where each matrix corresponds 

868 to a specific group of training indices (e.g., for a submodel), and includes 

869 both function value noise and associated derivative noise. 

870 

871 Parameters 

872 ---------- 

873 sigma_data : ndarray of shape (n_total, n_total) 

874 Full covariance (typically diagonal) matrix for all training data, including function values and derivatives. 

875 index : list of lists of int 

876 List where each sublist contains indices of the function values for one submodel (e.g., a cluster or partition). 

877 der_indices : list of lists 

878 Each sublist contains derivative directions for the submodel, corresponding to base_der_indices. 

879 num_points : int 

880 Number of training points per function (used to compute index offsets for derivatives). 

881 base_der_indices : list of lists 

882 Master list of derivative indices that define the ordering of blocks in the covariance matrix. 

883 

884 Returns 

885 ------- 

886 sub_model_matricies : list of ndarray 

887 List of diagonal noise matrices (ndarray of shape (n_submodel_total, n_submodel_total)) for each submodel, 

888 combining noise contributions from function values and all applicable derivative components. 

889 

890 Raises 

891 ------ 

892 Exception 

893 If a derivative index in `der_indices` is not found in `base_der_indices`. 

894 

895 Example 

896 ------- 

897 >>> sigma_data.shape = (300, 300) 

898 >>> index = [[0, 1, 2], [3, 4, 5]] 

899 >>> der_indices = [[[1, 1]], [[2, 1], [1, 2]]] 

900 >>> base_der_indices = [[[1, 1]], [[2, 1]], [[1, 2]]] 

901 >>> num_points = 100 

902 >>> generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices) 

903 [array of shape (6, 6), array of shape (9, 9)] 

904 """ 

905 

906 sub_model_matricies = [] 

907 for i, idx in enumerate(index): 

908 values = np.diag(sigma_data[:num_points, :num_points]) 

909 for j in range(len(der_indices[i])): 

910 for k, item in enumerate(base_der_indices): 

911 if item == der_indices[i][j]: 

912 scale_factor = k 

913 break 

914 else: 

915 scale_factor = -1 

916 continue 

917 

918 if scale_factor == -1: 

919 raise Exception('Unknown Error') 

920 scale_factor = k + 1 

921 indices = (scale_factor*num_points)+np.array(idx[j]) 

922 if len(indices) == 0: 

923 pass 

924 else: 

925 values = np.concatenate( 

926 (values, sigma_data[indices[0:], indices[0:]].flatten())) 

927 sub_model_matricies.append(np.diag(values)) 

928 

929 return sub_model_matricies 

930def generate_submodel_noise_matricies_old(sigma_data, index, der_indices, num_points, base_der_indices): 

931 """ 

932 Generate diagonal noise covariance matrices for each submodel component 

933 (including function values and their associated derivatives). 

934 

935 This function constructs a list of diagonal matrices where each matrix corresponds 

936 to a specific group of training indices (e.g., for a submodel), and includes 

937 both function value noise and associated derivative noise. 

938 

939 Parameters 

940 ---------- 

941 sigma_data : ndarray of shape (n_total, n_total) 

942 Full covariance (typically diagonal) matrix for all training data, including function values and derivatives. 

943 index : list of lists of int 

944 List where each sublist contains indices of the function values for one submodel (e.g., a cluster or partition). 

945 der_indices : list of lists 

946 Each sublist contains derivative directions for the submodel, corresponding to base_der_indices. 

947 num_points : int 

948 Number of training points per function (used to compute index offsets for derivatives). 

949 base_der_indices : list of lists 

950 Master list of derivative indices that define the ordering of blocks in the covariance matrix. 

951 

952 Returns 

953 ------- 

954 sub_model_matricies : list of ndarray 

955 List of diagonal noise matrices (ndarray of shape (n_submodel_total, n_submodel_total)) for each submodel, 

956 combining noise contributions from function values and all applicable derivative components. 

957 

958 Raises 

959 ------ 

960 Exception 

961 If a derivative index in `der_indices` is not found in `base_der_indices`. 

962 

963 Example 

964 ------- 

965 >>> sigma_data.shape = (300, 300) 

966 >>> index = [[0, 1, 2], [3, 4, 5]] 

967 >>> der_indices = [[[1, 1]], [[2, 1], [1, 2]]] 

968 >>> base_der_indices = [[[1, 1]], [[2, 1]], [[1, 2]]] 

969 >>> num_points = 100 

970 >>> generate_submodel_noise_matricies(sigma_data, index, der_indices, num_points, base_der_indices) 

971 [array of shape (6, 6), array of shape (9, 9)] 

972 """ 

973 

974 sub_model_matricies = [] 

975 for i, idx in enumerate(index): 

976 values = np.diag(sigma_data[:num_points, :num_points]) 

977 for j in range(len(der_indices[i])): 

978 for k, item in enumerate(base_der_indices): 

979 if item == der_indices[i][j]: 

980 scale_factor = k 

981 break 

982 else: 

983 scale_factor = -1 

984 continue 

985 

986 if scale_factor == -1: 

987 raise Exception('Unknown Error') 

988 scale_factor = k + 1 

989 indices = (scale_factor*num_points)+np.array(idx) 

990 values = np.concatenate( 

991 (values, sigma_data[indices[0:], indices[0:]].flatten())) 

992 sub_model_matricies.append(np.diag(values)) 

993 

994 return sub_model_matricies 

995 

996 

997def matern_kernel_builder(nu, oti_module=None): 

998 """ 

999 Symbolically builds the Matérn kernel function with given smoothness ν. 

1000  

1001 Parameters 

1002 ---------- 

1003 nu : float 

1004 Smoothness parameter of the Matérn kernel. Should be a half-integer (e.g., 0.5, 1.5, 2.5, ...). 

1005 oti_module : module, optional 

1006 The PyOTI static module to use for exp/sqrt. If None, uses numpy. 

1007  

1008 Returns 

1009 ------- 

1010 callable 

1011 A lambdified function that evaluates the Matérn kernel as a function of distance r. 

1012 """ 

1013 r = sp.symbols('r') 

1014 nu = sp.Rational(2 * nu, 2) 

1015 prefactor = (2 ** (1 - nu)) / sp.gamma(nu) 

1016 z = sp.sqrt(2 * nu) * r 

1017 k_r = prefactor * z**nu * sp.simplify(sp.besselk(nu, z)) 

1018 expr = sp.simplify(k_r) 

1019 

1020 if oti_module is not None: 

1021 custom_dict = {"exp": oti_module.exp} 

1022 matern_kernel_func = sp.lambdify(r, expr, modules=[custom_dict, "numpy"]) 

1023 else: 

1024 matern_kernel_func = sp.lambdify(r, expr, modules=["numpy"]) 

1025 

1026 return matern_kernel_func 

1027 

1028 

1029def matern_kernel_grad_builder(nu, oti_module=None): 

1030 """ 

1031 Builds the derivative df/dr of the Matérn kernel function with given smoothness ν. 

1032 

1033 Parameters 

1034 ---------- 

1035 nu : float 

1036 Smoothness parameter of the Matérn kernel (half-integer, e.g. 0.5, 1.5, 2.5). 

1037 oti_module : module, optional 

1038 The PyOTI static module to use for exp. If None, uses numpy. 

1039 

1040 Returns 

1041 ------- 

1042 callable 

1043 A lambdified function evaluating df/dr as a function of scaled distance r. 

1044 """ 

1045 r = sp.symbols('r') 

1046 nu = sp.Rational(2 * nu, 2) 

1047 prefactor = (2 ** (1 - nu)) / sp.gamma(nu) 

1048 z = sp.sqrt(2 * nu) * r 

1049 k_r = prefactor * z**nu * sp.simplify(sp.besselk(nu, z)) 

1050 dk_dr = sp.diff(sp.simplify(k_r), r) 

1051 dk_dr = sp.simplify(dk_dr) 

1052 

1053 if oti_module is not None: 

1054 custom_dict = {"exp": oti_module.exp} 

1055 return sp.lambdify(r, dk_dr, modules=[custom_dict, "numpy"]) 

1056 else: 

1057 return sp.lambdify(r, dk_dr, modules=["numpy"]) 

1058 

1059 

1060 

1061def generate_bernoulli_numbers(n_max: int) -> list[Fraction]: 

1062 """ 

1063 Generates Bernoulli numbers B_0 to B_n_max using their recurrence relation. 

1064 

1065 Args: 

1066 n_max: The maximum order of the Bernoulli number to generate. 

1067 

1068 Returns: 

1069 A list of Bernoulli numbers as Fraction objects. 

1070 """ 

1071 if n_max < 0: 

1072 return [] 

1073 

1074 B = [Fraction(0)] * (n_max + 1) 

1075 B[0] = Fraction(1) # B_0 = 1 

1076 

1077 for n in range(1, n_max + 1): 

1078 # The recurrence relation sum: sum_{k=0 to n} C(n+1, k) * B_k = 0 

1079 # We solve for B_n: B_n = - (1 / C(n+1, n)) * sum_{k=0 to n-1} C(n+1, k) * B_k 

1080 sum_val = Fraction(0) 

1081 for k in range(n): 

1082 # Binomial coefficient C(n+1, k) 

1083 binomial_coeff = comb(n + 1, k, exact=True) 

1084 sum_val += binomial_coeff * B[k] 

1085 

1086 # C(n+1, n) is just n+1 

1087 B[n] = -sum_val / (n + 1) 

1088 

1089 # B_k is zero for all odd k > 1 

1090 if n > 1 and n % 2 != 0: 

1091 B[n] = Fraction(0) 

1092 

1093 return B 

1094 

1095def generate_bernoulli_polynomial(alpha: int) -> Polynomial: 

1096 """ 

1097 Generates the (2*alpha)-th Bernoulli polynomial, B_{2*alpha}(x). 

1098 

1099 Args: 

1100 alpha: A non-negative integer. 

1101 

1102 Returns: 

1103 A numpy.polynomial.Polynomial object representing the polynomial. 

1104 """ 

1105 if not isinstance(alpha, int) or alpha < 0: 

1106 raise ValueError("alpha must be a non-negative integer.") 

1107 

1108 n = 2 * alpha 

1109 

1110 # Step 1: Generate the required Bernoulli numbers 

1111 bernoulli_nums = generate_bernoulli_numbers(n) 

1112 

1113 # Step 2: Construct the polynomial coefficients 

1114 # The polynomial is sum_{k=0 to n} C(n, k) * B_k * x^(n-k) 

1115 # The coefficient of x^j is C(n, n-j) * B_{n-j} 

1116 # numpy.polynomial expects coeffs from lowest power (x^0) to highest (x^n) 

1117 

1118 coeffs = np.zeros(n + 1, dtype=object) 

1119 

1120 for j in range(n + 1): # j represents the power of x 

1121 k = n - j 

1122 # Coefficient for x^j is C(n, k) * B_k 

1123 binomial_coeff = comb(n, k, exact=True) 

1124 term_coeff = binomial_coeff * bernoulli_nums[k] 

1125 coeffs[j] = float(term_coeff) # Convert fraction to float for Polynomial class 

1126 

1127 # The Polynomial class takes coefficients from lowest power to highest 

1128 return Polynomial(coeffs) 

1129 

1130def generate_bernoulli_lambda(alpha: int) -> callable: 

1131 """ 

1132 Generates a callable (lambda) function for the (2*alpha)-th Bernoulli polynomial. 

1133 

1134 Args: 

1135 alpha: A non-negative integer. 

1136 

1137 Returns: 

1138 A callable function that evaluates B_{2*alpha}(x). 

1139 """ 

1140 # The numpy Polynomial object is already a callable function. 

1141 return ((-1)**(alpha + 1) * (2*np.pi)**(2 * alpha)) / (math.factorial(2 * alpha)) * generate_bernoulli_polynomial(alpha) 

1142 

1143 

1144def robust_local_optimization(func, x0, args=(), lb=None, ub=None, debug=False): 

1145 """Robust L-BFGS-B optimization with abnormal termination handling""" 

1146 bounds = list(zip(lb, ub)) if lb is not None and ub is not None else None 

1147 

1148 result = minimize( 

1149 func, x0, args=args, method="L-BFGS-B", bounds=bounds, 

1150 options={"disp": False, "maxiter": 1000, "gtol": 1e-7} 

1151 ) 

1152 sys.stdout.flush() 

1153 

1154 if "ABNORMAL_TERMINATION_IN_LNSRCH" in result.message: 

1155 result.recovered_from_abnormal = False 

1156 if debug: 

1157 print(f"L-BFGS-B terminated abnormally: {result.message:}") 

1158 print("Using current point as recovered solution...") 

1159 

1160 # Create result with current point 

1161 class RecoveredResult: 

1162 def __init__(self, x, fun, message): 

1163 self.x = x 

1164 self.fun = fun 

1165 self.success = False 

1166 self.message = message 

1167 self.recovered_from_abnormal = True 

1168 

1169 current_f = func(result.x, *args) 

1170 return RecoveredResult(result.x, current_f, f"Recovered from abnormal termination: {result.message}") 

1171 else: 

1172 return result 

1173 

1174 

1175def should_accept_local_result(local_res, current_best_f, is_feasible, debug=False): 

1176 """Check if local optimization result should be accepted""" 

1177 if not local_res.success and not local_res.recovered_from_abnormal: 

1178 return False 

1179 

1180 if not is_feasible(local_res.x): 

1181 if debug: 

1182 print("Local optimization result is infeasible - rejecting") 

1183 return False 

1184 

1185 if local_res.fun >= current_best_f: 

1186 if debug: 

1187 print( 

1188 "Local optimization did not improve:") 

1189 return False 

1190 

1191 return True 

1192 

1193 

1194 

1195def jade(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={}, 

1196 pop_size=100, n_generations=100, p=0.1, c=0.1, 

1197 minstep=1e-6, stagnation_limit=15, debug=False, 

1198 local_opt_every=15, initial_positions=None, seed=42, 

1199 local_optimizer=None, func_and_grad=None, grad_func=None): 

1200 """ 

1201 JADE (Adaptive Differential Evolution) with optional local refinement 

1202 and stagnation-based stopping criterion. 

1203  

1204 JADE: Adaptive Differential Evolution With Optional External Archive 

1205 https://ieeexplore.ieee.org/abstract/document/5208221 

1206  

1207 Parameters 

1208 ---------- 

1209 func : callable 

1210 Objective function to minimize. 

1211 lb, ub : array-like 

1212 Lower and upper bounds. 

1213 ieqcons : list 

1214 List of inequality constraint functions. 

1215 f_ieqcons : callable or None 

1216 Single function returning array of constraint values. 

1217 args : tuple 

1218 Extra arguments passed to func. 

1219 kwargs : dict 

1220 Extra keyword arguments passed to func. 

1221 pop_size : int 

1222 Population size. 

1223 n_generations : int 

1224 Maximum number of generations. 

1225 p : float 

1226 Fraction of top individuals for p-best selection. 

1227 c : float 

1228 Learning rate for parameter adaptation. 

1229 minstep : float 

1230 Minimum position change to accept a new best (when improving). 

1231 stagnation_limit : int 

1232 Stop if no improvement for this many consecutive generations. 

1233 debug : bool 

1234 Print debug information. 

1235 local_opt_every : int or None 

1236 Run local optimization every this many generations. None to disable. 

1237 initial_positions : array-like or None 

1238 Initial positions to seed the population. 

1239 seed : int 

1240 Random seed. 

1241 """ 

1242 

1243 lb = np.asarray(lb) 

1244 ub = np.asarray(ub) 

1245 D = len(lb) 

1246 np.random.seed(seed) 

1247 

1248 # Constraint setup 

1249 if f_ieqcons is not None: 

1250 def cons(x): return np.asarray(f_ieqcons(x, *args, **kwargs)) 

1251 elif ieqcons: 

1252 def cons(x): return np.asarray([c(x, *args, **kwargs) for c in ieqcons]) 

1253 else: 

1254 def cons(x): return np.array([0.0]) 

1255 

1256 def is_feasible(x): 

1257 return np.all(cons(x) >= 0) 

1258 

1259 # Initialize population using Sobol 

1260 sampler = qmc.Sobol(d=D, scramble=True, seed=seed) 

1261 sobol_sample = sampler.random_base2(m=int(np.ceil(np.log2(pop_size)))) 

1262 pop = qmc.scale(sobol_sample[:pop_size], lb, ub) 

1263 if initial_positions is not None: 

1264 n_init = min(len(initial_positions), pop_size) 

1265 pop[:n_init] = initial_positions[:n_init] 

1266 

1267 # Fitness and feasibility 

1268 fitness = np.array([func(ind, *args, **kwargs) for ind in pop]) 

1269 feasible = np.array([is_feasible(ind) for ind in pop]) 

1270 

1271 # Initialize global best 

1272 g_idx = np.argmin(fitness * feasible + (~feasible) * 1e10) 

1273 g_best = pop[g_idx].copy() 

1274 f_best = fitness[g_idx] 

1275 

1276 mu_F = 0.5 

1277 mu_CR = 0.5 

1278 archive = [] 

1279 

1280 # Store previous best for minstep comparison 

1281 prev_g_best = g_best.copy() 

1282 

1283 # Stagnation counter 

1284 stagnation_count = 0 

1285 

1286 for gen in range(1, n_generations + 1): 

1287 f_prev_gen = f_best 

1288 

1289 new_pop = np.zeros_like(pop) 

1290 new_fitness = np.zeros(pop_size) 

1291 new_F_list = [] 

1292 new_CR_list = [] 

1293 

1294 for i in range(pop_size): 

1295 # Adaptive F and CR 

1296 F = np.random.standard_cauchy() * 0.1 + mu_F 

1297 F = np.clip(F, 0, 1) 

1298 CR = np.random.normal(mu_CR, 0.1) 

1299 CR = np.clip(CR, 0, 1) 

1300 

1301 # p-best selection 

1302 p_num = max(2, int(np.ceil(p * pop_size))) 

1303 pbest_idx = np.random.choice(np.argsort(fitness)[:p_num]) 

1304 x_pbest = pop[pbest_idx] 

1305 

1306 # Mutation: current-to-pbest 

1307 idxs = [idx for idx in range(pop_size) if idx != i] 

1308 r1, r2 = np.random.choice(idxs, 2, replace=False) 

1309 x_mut = pop[i] + F * (x_pbest - pop[i]) + F * (pop[r1] - pop[r2]) 

1310 x_mut = np.clip(x_mut, lb, ub) 

1311 

1312 # Crossover 

1313 jrand = np.random.randint(D) 

1314 trial = np.array([ 

1315 x_mut[j] if np.random.rand() < CR or j == jrand 

1316 else pop[i][j] for j in range(D) 

1317 ]) 

1318 trial = np.clip(trial, lb, ub) 

1319 

1320 f_trial = func(trial, *args, **kwargs) 

1321 if is_feasible(trial) and f_trial < fitness[i]: 

1322 new_pop[i] = trial 

1323 new_fitness[i] = f_trial 

1324 archive.append(pop[i].copy()) 

1325 new_F_list.append(F) 

1326 new_CR_list.append(CR) 

1327 else: 

1328 new_pop[i] = pop[i] 

1329 new_fitness[i] = fitness[i] 

1330 

1331 # Update population 

1332 pop = new_pop 

1333 fitness = new_fitness 

1334 

1335 # Update global best 

1336 feasible_idx = [idx for idx in range(pop_size) if is_feasible(pop[idx])] 

1337 if feasible_idx: 

1338 idx_best = feasible_idx[np.argmin(fitness[feasible_idx])] 

1339 if fitness[idx_best] < f_best: 

1340 # Check minstep only when g_best improves 

1341 step_size = np.linalg.norm(pop[idx_best] - prev_g_best) 

1342 if step_size <= minstep: 

1343 if debug: 

1344 print(f"Stopping: Position change < {minstep} at generation {gen}") 

1345 g_best = pop[idx_best].copy() 

1346 f_best = fitness[idx_best] 

1347 break 

1348 

1349 g_best = pop[idx_best].copy() 

1350 f_best = fitness[idx_best] 

1351 prev_g_best = g_best.copy() 

1352 

1353 # Adapt F and CR 

1354 if new_F_list: 

1355 mu_F = (1 - c) * mu_F + c * np.mean(new_F_list) 

1356 mu_CR = (1 - c) * mu_CR + c * np.mean(new_CR_list) 

1357 

1358 # Periodic local refinement 

1359 if local_opt_every is not None and gen % local_opt_every == 0: 

1360 if local_optimizer is not None: 

1361 res = local_optimizer(func, g_best, lb, ub) 

1362 elif callable(func_and_grad): 

1363 res = minimize(func_and_grad, g_best, args=args, method="L-BFGS-B", 

1364 jac=True, bounds=np.stack((lb, ub), axis=1), 

1365 options={"maxiter": 50}) 

1366 elif callable(grad_func): 

1367 def _fg(x): return func(x), grad_func(x) 

1368 res = minimize(_fg, g_best, args=args, method="L-BFGS-B", 

1369 jac=True, bounds=np.stack((lb, ub), axis=1), 

1370 options={"maxiter": 50}) 

1371 else: 

1372 res = minimize(func, g_best, args=args, 

1373 bounds=np.stack((lb, ub), axis=1), 

1374 options={"maxiter":50}) 

1375 if is_feasible(res.x) and res.fun < f_best: 

1376 g_best = res.x.copy() 

1377 f_best = res.fun 

1378 prev_g_best = g_best.copy() 

1379 if debug: 

1380 print(f"Local refinement at gen {gen}: nit={res.nit}, nfev={res.nfev}, " 

1381 f"f={res.fun:.6e}, success={res.success}, message={res.message}") 

1382 

1383 # Debug print 

1384 if debug: 

1385 print(f"Gen {gen}: best f={f_best}") 

1386 

1387 # Check stagnation stopping criterion 

1388 if f_best < f_prev_gen: 

1389 stagnation_count = 0 

1390 else: 

1391 stagnation_count += 1 

1392 

1393 if stagnation_count >= stagnation_limit: 

1394 if debug: 

1395 print(f"Stopping: No improvement for {stagnation_limit} generations at gen {gen}") 

1396 break 

1397 

1398 return g_best, f_best 

1399 

1400def pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={}, 

1401 pop_size=100, omega=0.5, phip=0.5, phig=0.5, n_generations=100, 

1402 minstep=1e-6, minfunc=1e-6, debug=False, seed=42, 

1403 local_opt_every=15, initial_positions=None, local_optimizer=None, 

1404 func_and_grad=None, grad_func=None): 

1405 """ 

1406 Particle Swarm Optimization with periodic local refinement 

1407 R. C. Eberhart, Y. Shi and J. Kennedy, Swarm Intelligence, CA, San Mateo:Morgan Kaufmann, 2001. 

1408 https://theswissbay.ch/pdf/Gentoomen%20Library/Artificial%20Intelligence/Swarm%20Intelligence/Swarm%20intelligence%20-%20James%20Kennedy.pdf 

1409 Parameters: 

1410 ----------- 

1411 func : callable 

1412 Objective function to minimize 

1413 lb : array_like 

1414 Lower bounds for variables 

1415 ub : array_like  

1416 Upper bounds for variables 

1417 ieqcons : list, optional 

1418 List of inequality constraint functions 

1419 f_ieqcons : callable, optional 

1420 Function returning array of inequality constraints 

1421 args : tuple, optional 

1422 Extra arguments passed to objective function 

1423 kwargs : dict, optional 

1424 Extra keyword arguments passed to objective function 

1425 swarmsize : int, optional 

1426 Number of particles in swarm 

1427 omega : float, optional 

1428 Inertia weight 

1429 phip : float, optional 

1430 Personal best weight 

1431 phig : float, optional 

1432 Global best weight 

1433 maxiter : int, optional 

1434 Maximum number of iterations 

1435 minstep : float, optional 

1436 Minimum step size for convergence 

1437 minfunc : float, optional 

1438 Minimum function improvement for convergence 

1439 debug : bool, optional 

1440 Whether to print debug information 

1441 seed : int, optional 

1442 Random seed for reproducibility 

1443 local_opt_every : int, optional 

1444 Frequency of local optimization (every N iterations) 

1445 

1446 Returns: 

1447 -------- 

1448 best_position : ndarray 

1449 Best position found 

1450 best_value : float 

1451 Best function value found 

1452 """ 

1453 

1454 # Input validation 

1455 lb = np.asarray(lb) 

1456 ub = np.asarray(ub) 

1457 D = len(lb) 

1458 

1459 assert len(lb) == len( 

1460 ub), "Lower- and upper-bounds must be the same length" 

1461 assert np.all( 

1462 ub > lb), "All upper-bound values must be greater than lower-bound values" 

1463 assert callable(func), "Invalid function handle" 

1464 

1465 # Set random seed 

1466 np.random.seed(seed) 

1467 

1468 # Velocity bounds 

1469 vhigh = np.abs(ub - lb) 

1470 vlow = -vhigh 

1471 

1472 # Constraint setup 

1473 if f_ieqcons is not None: 

1474 def cons(x): 

1475 return np.asarray(f_ieqcons(x, *args, **kwargs)) 

1476 elif ieqcons: 

1477 def cons(x): 

1478 return np.asarray([c(x, *args, **kwargs) for c in ieqcons]) 

1479 else: 

1480 def cons(x): 

1481 return np.array([0.0]) 

1482 

1483 def is_feasible(x): 

1484 return np.all(cons(x) >= 0) 

1485 

1486 # Initialize swarm using Sobol sequence 

1487 sampler = qmc.Sobol(d=D, scramble=True, seed=seed) 

1488 sobol_sample = sampler.random_base2(m=int(np.ceil(np.log2(pop_size)))) 

1489 x = qmc.scale(sobol_sample[:pop_size], lb, ub) 

1490 if initial_positions is not None: 

1491 n_init = min(len(initial_positions), pop_size) 

1492 x[:n_init] = initial_positions[:n_init] 

1493 # Initialize velocities and personal bests 

1494 v = vlow + np.random.rand(pop_size, D) * (vhigh - vlow) 

1495 p = np.copy(x) 

1496 fp = np.array([func(p[i], *args, **kwargs) for i in range(pop_size)]) 

1497 feasibles = np.array([is_feasible(p[i]) for i in range(pop_size)]) 

1498 

1499 # Initialize global best 

1500 fg = np.inf 

1501 g = None 

1502 

1503 for i in range(pop_size): 

1504 if feasibles[i] and fp[i] < fg: 

1505 fg = fp[i] 

1506 g = p[i].copy() 

1507 

1508 # Main PSO loop 

1509 it = 1 

1510 while it <= n_generations: 

1511 # Update velocities and positions 

1512 rp = np.random.rand(pop_size, D) 

1513 rg = np.random.rand(pop_size, D) 

1514 v = omega * v + phip * rp * (p - x) + phig * rg * (g - x) 

1515 x = np.clip(x + v, lb, ub) 

1516 

1517 # Evaluate particles and update personal/global bests 

1518 for i in range(pop_size): 

1519 fx = func(x[i], *args, **kwargs) 

1520 

1521 if fx < fp[i] and is_feasible(x[i]): 

1522 p[i] = x[i].copy() 

1523 fp[i] = fx 

1524 

1525 if fx < fg: 

1526 stepsize = np.linalg.norm(g - x[i]) 

1527 

1528 if debug: 

1529 print( 

1530 f'New best for swarm at iteration {it}: {x[i]} {fx}') 

1531 

1532 # Check convergence criteria 

1533 if np.abs(fg - fx) <= minfunc: 

1534 print(f'Stopping: Objective improvement < {minfunc}') 

1535 return x[i], fx 

1536 if stepsize <= minstep: 

1537 print(f'Stopping: Position change < {minstep}') 

1538 return x[i], fx 

1539 

1540 g = x[i].copy() 

1541 fg = fx 

1542 

1543 # Periodic local refinement 

1544 if local_opt_every is None: 

1545 pass 

1546 elif it % local_opt_every == 0: 

1547 if local_optimizer is not None: 

1548 local_res = local_optimizer(func, g, lb, ub) 

1549 elif callable(func_and_grad): 

1550 bounds = list(zip(lb, ub)) 

1551 local_res = minimize(func_and_grad, g, args=args, method="L-BFGS-B", 

1552 jac=True, bounds=bounds, 

1553 options={"maxiter": 20}) 

1554 if not hasattr(local_res, 'recovered_from_abnormal'): 

1555 local_res.recovered_from_abnormal = False 

1556 elif callable(grad_func): 

1557 bounds = list(zip(lb, ub)) 

1558 def _fg(x): return func(x), grad_func(x) 

1559 local_res = minimize(_fg, g, args=args, method="L-BFGS-B", 

1560 jac=True, bounds=bounds, 

1561 options={"maxiter": 20}) 

1562 if not hasattr(local_res, 'recovered_from_abnormal'): 

1563 local_res.recovered_from_abnormal = False 

1564 else: 

1565 local_res = robust_local_optimization( 

1566 func, g, args=args, lb=lb, ub=ub, debug=False 

1567 ) 

1568 

1569 if should_accept_local_result(local_res, fg, is_feasible, debug): 

1570 if debug: 

1571 print( 

1572 "Gradient refinement improved") 

1573 g = local_res.x.copy() 

1574 fg = local_res.fun 

1575 

1576 if debug: 

1577 print(f'Best after iteration {it}: {g} {fg}') 

1578 it += 1 

1579 

1580 # Final checks 

1581 print(f'Stopping: maximum iterations reached --> {n_generations}') 

1582 if g is not None and not is_feasible(g): 

1583 print("Warning: Optimization finished without a feasible solution.") 

1584 

1585 return g, fg 

1586 

1587 

1588def ecl_acquisition(mu_N, var_N, threshold=0.0): 

1589 """ 

1590 Entropy Contour Learning (ECL) acquisition function for gddegp models. 

1591 

1592 Implements the ECL acquisition function from equation (8): 

1593 ECL(x | S_N, g) = -P(g(Y(x)) > 0) log P(g(Y(x)) > 0) - P(g(Y(x)) ≤ 0) log P(g(Y(x)) ≤ 0) 

1594 

1595 Where g is the affine limit state function g(Y(x)) = Y(x) - T and the failure  

1596 region G is defined by g(Y(x)) ≤ 0. 

1597 

1598 Parameters: 

1599 ----------- 

1600 gp_model : gddegp 

1601 Trained gp model instance 

1602 X : array-like, shape (n_points, n_features) or (n_features,) 

1603 Points to evaluate the acquisition function 

1604 rays_predict : ndarray 

1605 Ray directions for prediction. Shape should be (n_dims, n_rays) 

1606 params : ndarray 

1607 Hyperparameters for gddegp prediction 

1608 threshold : float, default=0.0 

1609 Threshold T for the affine limit state function g(Y(x)) = Y(x) - T 

1610 Default is 0.0, meaning failure region is defined by Y(x) ≤ 0 

1611 

1612 Returns: 

1613 -------- 

1614 ecl_values : array-like, shape (n_points,) or float 

1615 ECL acquisition function values (higher values indicate more informative points) 

1616 

1617 Examples: 

1618 --------- 

1619 >>> # After training your gddegp model 

1620 >>> ecl_vals = ecl_acquisition(gp_model, candidate_points, rays, params, threshold=0.0) 

1621 >>> next_point_idx = np.argmax(ecl_vals) 

1622 >>> next_point = candidate_points[next_point_idx] 

1623 """ 

1624 

1625 # Convert variance to standard deviation 

1626 sigma_N = np.sqrt(np.maximum(var_N, 1e-15)) # Ensure positive variance 

1627 

1628 # Handle single point case 

1629 if mu_N.ndim == 0: 

1630 mu_N = np.array([mu_N]) 

1631 if sigma_N.ndim == 0: 

1632 sigma_N = np.array([sigma_N]) 

1633 

1634 # Compute standardized values: (μ_N(x) - T) / σ_N(x) 

1635 sigma_N = np.maximum(sigma_N, 1e-10) # Numerical stability 

1636 z = (mu_N - threshold) / sigma_N 

1637 

1638 # Compute probabilities using standard normal CDF Φ 

1639 # P(g(Y(x)) > 0) = P(Y(x) > T) = 1 - Φ((μ_N(x) - T)/σ_N(x)) 

1640 p_safe = 1 - norm.cdf(z) # Probability of being in safe region 

1641 

1642 # Compute ECL using binary entropy: H(p) = -p*log(p) - (1-p)*log(1-p) 

1643 # Clip probabilities for numerical stability 

1644 p_safe = np.clip(p_safe, 1e-15, 1 - 1e-15) 

1645 ecl_values = -p_safe * np.log(p_safe) - (1 - p_safe) * np.log(1 - p_safe) 

1646 

1647 # Return scalar if single point, array otherwise 

1648 return ecl_values[0] if len(ecl_values) == 1 else ecl_values 

1649 

1650 

1651def ecl_batch_acquisition(gp_model, X, rays_predict, params, threshold=0.0, batch_size=1): 

1652 """ 

1653 Batch ECL acquisition function that selects multiple points simultaneously. 

1654 

1655 Uses a greedy approach to select the next batch_size points that maximize 

1656 the ECL criterion while maintaining diversity. 

1657 

1658 Parameters: 

1659 ----------- 

1660 gp_model : gddegp 

1661 Trained gddegp model instance 

1662 X : array-like, shape (n_candidates, n_features) 

1663 Candidate points to select from 

1664 rays_predict : ndarray 

1665 Ray directions for prediction 

1666 params : ndarray 

1667 Hyperparameters for gddegp prediction 

1668 threshold : float, default=0.0 

1669 Threshold for the limit state function 

1670 batch_size : int, default=1 

1671 Number of points to select in the batch 

1672 

1673 Returns: 

1674 -------- 

1675 selected_indices : array-like, shape (batch_size,) 

1676 Indices of selected points from X 

1677 selected_points : array-like, shape (batch_size, n_features) 

1678 The selected points 

1679 

1680 Examples: 

1681 --------- 

1682 >>> indices, points = ecl_batch_acquisition(gp_model, candidates, rays, params, batch_size=3) 

1683 >>> next_experiments = points 

1684 """ 

1685 X = np.atleast_2d(X) 

1686 n_candidates = X.shape[0] 

1687 

1688 if batch_size >= n_candidates: 

1689 return np.arange(n_candidates), X 

1690 

1691 selected_indices = [] 

1692 remaining_indices = list(range(n_candidates)) 

1693 

1694 for _ in range(batch_size): 

1695 if not remaining_indices: 

1696 break 

1697 

1698 # Evaluate ECL for remaining candidates 

1699 remaining_X = X[remaining_indices] 

1700 ecl_values = ecl_acquisition( 

1701 gp_model, remaining_X, rays_predict, params, threshold) 

1702 

1703 # Select point with highest ECL value 

1704 best_local_idx = np.argmax(ecl_values) 

1705 best_global_idx = remaining_indices[best_local_idx] 

1706 

1707 selected_indices.append(best_global_idx) 

1708 remaining_indices.remove(best_global_idx) 

1709 

1710 selected_indices = np.array(selected_indices) 

1711 selected_points = X[selected_indices] 

1712 

1713 return selected_indices, selected_points 

1714 

1715 

1716def get_surrogate_gradient_ray(gp, x, params, fallback_axis=0, normalize=True, threshold=0.0): 

1717 """ 

1718 Returns a normalized surrogate gradient direction (d x 1 column vector) 

1719 at location x using the current GP model (any input dimension). 

1720 If the GP mean at x is above threshold, returns -grad; else returns grad. 

1721 

1722 Parameters 

1723 ---------- 

1724 gp : object 

1725 Trained GP model instance with .predict (supports arbitrary input dim) 

1726 x : array-like, shape (1, d) 

1727 The input location where to compute the surrogate gradient direction 

1728 params : array-like 

1729 GP hyperparameters 

1730 fallback_axis : int, default=0 

1731 Axis to use if gradient norm is zero (default: 0) 

1732 normalize : bool, default=True 

1733 If True, return a unit vector; else, return unnormalized gradient 

1734 threshold : float, default=0.0 

1735 Threshold value for sign flip 

1736 

1737 Returns 

1738 ------- 

1739 ray : ndarray, shape (d, 1) 

1740 The chosen direction (as a column vector) 

1741 grad : ndarray, shape (d, 1) 

1742 The predicted gradient as a column vector (signed as above) 

1743 """ 

1744 x = np.atleast_2d(x) 

1745 d = x.shape[1] 

1746 grad = np.zeros((d, 1)) 

1747 

1748 # Predict each directional derivative along standard basis 

1749 for i in range(d): 

1750 basis = np.zeros((d, 1)) 

1751 basis[i, 0] = 1.0 

1752 mu = gp.predict(x, basis, params, calc_cov=False, return_deriv=True) 

1753 grad[i, 0] = mu[1] # Assumes mu[0]=f(x), mu[1]=first derivative 

1754 

1755 # Predict the GP mean at x (using any direction, e.g., first axis) 

1756 basis0 = np.zeros((d, 1)) 

1757 basis0[0, 0] = 1.0 

1758 mu_mean = gp.predict(x, basis0, params, calc_cov=False, 

1759 return_deriv=False)[0] 

1760 

1761 # Flip sign if above threshold 

1762 if mu_mean > threshold: 

1763 grad = -grad 

1764 

1765 norm = np.linalg.norm(grad) 

1766 if normalize: 

1767 if norm < 1e-16: 

1768 fallback = np.zeros((d, 1)) 

1769 fallback[fallback_axis, 0] = 1.0 

1770 return fallback.reshape(-1, 1) 

1771 else: 

1772 return (grad / norm).reshape(-1, 1) 

1773 else: 

1774 return grad.reshape(-1, 1) 

1775 

1776 

1777def finite_difference_gradient(gp, x, params, h=1e-5): 

1778 """ 

1779 Compute central finite difference approximation of GP mean gradient at x. 

1780 

1781 Parameters 

1782 ---------- 

1783 gp : object 

1784 Trained GP model instance 

1785 x : ndarray, shape (1, d) 

1786 Point at which to compute finite difference gradient 

1787 params : array-like 

1788 GP hyperparameters 

1789 h : float 

1790 Step size 

1791 

1792 Returns 

1793 ------- 

1794 grad_fd : ndarray, shape (d, 1) 

1795 Central finite difference gradient estimate 

1796 """ 

1797 x = np.atleast_2d(x) 

1798 d = x.shape[1] 

1799 grad_fd = np.zeros((d, 1)) 

1800 ray0 = np.zeros((d, 1)) 

1801 # Direction for mean prediction; can be any since return_deriv=False 

1802 ray0[0, 0] = 1.0 

1803 

1804 # Evaluate GP mean at central point and shifted points 

1805 for i in range(d): 

1806 dx = np.zeros_like(x) 

1807 dx[0, i] = h 

1808 

1809 f_plus = gp.predict(x + dx, ray0, params, 

1810 calc_cov=False, return_deriv=False) 

1811 f_minus = gp.predict(x - dx, ray0, params, 

1812 calc_cov=False, return_deriv=False) 

1813 grad_fd[i, 0] = (f_plus[0] - f_minus[0]) / (2 * h) 

1814 return grad_fd 

1815 

1816 

1817def check_gp_gradient(gp, x, params, h=1e-5, fallback_axis=0): 

1818 """ 

1819 Compare GP-predicted gradient vs finite-difference at x. 

1820 Prints and returns both. 

1821 """ 

1822 # Surrogate-predicted gradient 

1823 grad_sur = get_surrogate_gradient_ray( 

1824 gp, x, params, fallback_axis=fallback_axis, normalize=False) 

1825 # Finite difference gradient 

1826 grad_fd = finite_difference_gradient(gp, x, params, h=h) 

1827 

1828 print("Surrogate GP gradient:\n", grad_sur.flatten()) 

1829 print("Finite-difference gradient:\n", grad_fd.flatten()) 

1830 print("Absolute error:\n", np.abs(grad_sur.flatten() - grad_fd.flatten())) 

1831 print("Relative error:\n", np.abs(grad_sur.flatten() - 

1832 grad_fd.flatten()) / (np.abs(grad_fd.flatten()) + 1e-15)) 

1833 

1834 return grad_sur, grad_fd 

1835 

1836 

1837def get_entropy_ridge_direction_nd(gp, x, params, threshold=0.0, h=1e-5, 

1838 fallback_axis=0, normalize=True, random_dir=False, seed=None): 

1839 """ 

1840 Get a direction tangent to the entropy level set ("ridge direction") at x. 

1841 In higher dimensions, returns either a single direction or an orthonormal basis for the tangent space. 

1842 

1843 Parameters 

1844 ---------- 

1845 gp : object 

1846 Trained GP model instance with .predict 

1847 x : array-like, shape (1, d) 

1848 The input location 

1849 params : array-like 

1850 GP hyperparameters 

1851 threshold : float 

1852 ECL threshold 

1853 h : float 

1854 Finite difference step 

1855 fallback_axis : int 

1856 Axis for fallback if gradient is zero 

1857 normalize : bool 

1858 Normalize output direction 

1859 random_dir : bool 

1860 If True, return a random direction in the ridge (level set). If False, returns first basis vector. 

1861 seed : int or None 

1862 For reproducible random direction 

1863 

1864 Returns 

1865 ------- 

1866 ridge_dir : ndarray, shape (d, 1) 

1867 Ridge direction (tangent to entropy level set) at x 

1868 grad_H : ndarray, shape (d, 1) 

1869 Gradient of entropy at x 

1870 basis : ndarray, shape (d, d-1) 

1871 (If requested) Orthonormal basis for tangent space to entropy level set at x 

1872 """ 

1873 from utils import ecl_acquisition 

1874 

1875 x = np.atleast_2d(x) 

1876 d = x.shape[1] 

1877 

1878 # Entropy at x 

1879 def entropy_func(x0): 

1880 x0 = np.atleast_2d(x0) 

1881 ray0 = np.zeros((d, 1)) 

1882 ray0[0, 0] = 1.0 

1883 mu, var = gp.predict( 

1884 x0, ray0, params, calc_cov=True, return_deriv=False) 

1885 return float(ecl_acquisition(mu, var, threshold=threshold)) 

1886 

1887 grad_H = np.zeros((d, 1)) 

1888 for i in range(d): 

1889 dx = np.zeros_like(x) 

1890 dx[0, i] = h 

1891 grad_H[i, 0] = (entropy_func(x + dx) - entropy_func(x - dx)) / (2 * h) 

1892 

1893 # Edge case: gradient nearly zero 

1894 # grad_norm = np.linalg.norm(grad_H) 

1895 # if grad_norm < 1e-12: 

1896 # fallback = np.zeros((d, 1)) 

1897 # fallback[fallback_axis, 0] = 1.0 

1898 # return fallback, grad_H, None 

1899 

1900 # Orthonormal basis for null space (tangent space to level set) 

1901 # null_space returns (d, d-1) matrix: each column is a basis vector 

1902 basis = null_space(grad_H.T) # shape (d, d-1) 

1903 

1904 # Select a direction from the tangent space 

1905 if random_dir: 

1906 rng = np.random.default_rng(seed) 

1907 coeffs = rng.standard_normal(basis.shape[1]) 

1908 ridge_dir = basis @ coeffs 

1909 if normalize: 

1910 ridge_dir = ridge_dir / np.linalg.norm(ridge_dir) 

1911 ridge_dir = ridge_dir.reshape(-1, 1) 

1912 else: 

1913 # Return first basis vector (deterministic) 

1914 ridge_dir = basis[:, 0] 

1915 if normalize: 

1916 ridge_dir = ridge_dir / np.linalg.norm(ridge_dir) 

1917 ridge_dir = ridge_dir.reshape(-1, 1) 

1918 

1919 return ridge_dir 

1920 

1921 

1922def get_entropy_ridge_direction_nd_2(gp, x, params, threshold=0.0, h=1e-5, 

1923 fallback_axis=0, normalize=True, random_dir=False, seed=None): 

1924 """ 

1925 Get a direction tangent to the entropy level set ("ridge direction") at x. 

1926 In higher dimensions, returns either a single direction or an orthonormal basis for the tangent space. 

1927 

1928 Parameters 

1929 ---------- 

1930 gp : object 

1931 Trained GP model instance with .predict 

1932 x : array-like, shape (1, d) 

1933 The input location 

1934 params : array-like 

1935 GP hyperparameters 

1936 threshold : float 

1937 ECL threshold 

1938 h : float 

1939 Finite difference step 

1940 fallback_axis : int 

1941 Axis for fallback if gradient is zero 

1942 normalize : bool 

1943 Normalize output direction 

1944 random_dir : bool 

1945 If True, return a random direction in the ridge (level set). If False, returns first basis vector. 

1946 seed : int or None 

1947 For reproducible random direction 

1948 

1949 Returns 

1950 ------- 

1951 ridge_dir : ndarray, shape (d, 1) 

1952 Ridge direction (tangent to entropy level set) at x 

1953 grad_H : ndarray, shape (d, 1) 

1954 Gradient of entropy at x 

1955 basis : ndarray, shape (d, d-1) 

1956 (If requested) Orthonormal basis for tangent space to entropy level set at x 

1957 """ 

1958 grad_H = get_surrogate_gradient_ray( 

1959 gp, x, params, fallback_axis=0, normalize=True, threshold=0.0) 

1960 

1961 # Edge case: gradient nearly zero 

1962 # grad_norm = np.linalg.norm(grad_H) 

1963 # if grad_norm < 1e-12: 

1964 # fallback = np.zeros((d, 1)) 

1965 # fallback[fallback_axis, 0] = 1.0 

1966 # return fallback, grad_H, None 

1967 

1968 # Orthonormal basis for null space (tangent space to level set) 

1969 # null_space returns (d, d-1) matrix: each column is a basis vector 

1970 basis = null_space(grad_H.T) # shape (d, d-1) 

1971 

1972 best_grad = np.inf 

1973 best_dir = None 

1974 for idx in range(basis.shape[1]): 

1975 v = basis[:, idx].reshape(-1, 1) 

1976 mu = gp.predict(x, v, params, calc_cov=False, return_deriv=True) 

1977 if gp.n_order >= 2: 

1978 second_deriv = mu[2] 

1979 else: 

1980 # Use central finite difference on the GP's directional derivative 

1981 x_fwd = x + h * v.T 

1982 x_bwd = x - h * v.T 

1983 # Predict first derivative at forward and backward positions 

1984 mu_fwd = gp.predict( 

1985 x_fwd, v, params, calc_cov=False, return_deriv=True) 

1986 mu_bwd = gp.predict( 

1987 x_bwd, v, params, calc_cov=False, return_deriv=True) 

1988 # mu_fwd[1] and mu_bwd[1] are first directional derivatives at shifted positions 

1989 second_deriv = (mu_fwd[1] - mu_bwd[1]) / (2 * h) 

1990 if abs(second_deriv) < best_grad: 

1991 best_grad = abs(second_deriv) 

1992 best_dir = v 

1993 

1994 return best_dir 

1995 

1996 

1997def sobol_points(n_points, box, seed=0): 

1998 d = len(box) 

1999 sampler = qmc.Sobol(d=d, scramble=True, seed=seed) 

2000 m = int(np.ceil(np.log2(n_points))) 

2001 pts = sampler.random_base2(m=m)[:n_points] 

2002 for j, (lo, hi) in enumerate(box): 

2003 pts[:, j] = lo + (hi - lo) * pts[:, j] 

2004 return pts 

2005 

2006 

2007def local_box_around_point(x_next, delta): 

2008 # x_next: shape (1, d) or (d,) 

2009 x_next = np.atleast_2d(x_next).reshape(-1) 

2010 d = x_next.shape[0] 

2011 return [(float(x_next[j] - delta), float(x_next[j] + delta)) for j in range(d)] 

2012 

2013 

2014# @profile 

2015# def maximize_ier_direction( 

2016# gp, x_next, x_train, y_blocks, rays_array, params, box, threshold=0.0, n_integration=500, seed=123, delta=.5 

2017# ): 

2018# """ 

2019# Maximizes Integrated Entropy Reduction (IER) at x_next over directions v (unit norm). 

2020# """ 

2021# d = x_next.shape[1] 

2022# # 1. Integration points for Monte Carlo estimate 

2023# local_box = local_box_around_point(x_next, delta) 

2024# integration_points = sobol_points(n_integration, local_box, seed) 

2025# # Use a default ray for integration points (e.g., all along first axis) 

2026# ray0 = np.zeros((d, 1)) 

2027# ray0[0, 0] = 1.0 

2028# integration_rays = np.tile(ray0, n_integration) 

2029 

2030# # 2. Get current mean/variance at integration points 

2031# mu_before, var_before = gp.predict( 

2032# integration_points, integration_rays, params, calc_cov=True, return_deriv=False 

2033# ) 

2034# from utils import ecl_acquisition 

2035# ecl_before = ecl_acquisition(mu_before, var_before, threshold=threshold) 

2036 

2037# @profile 

2038# def negative_ier(w): 

2039# v = w.reshape(-1, 1) 

2040# norm_v = np.linalg.norm(v) 

2041# if norm_v < 1e-12: 

2042# return 1e6 

2043# v = v / norm_v 

2044 

2045# Y_blocks = [y.copy() for y in y_blocks] 

2046# # --- Normalize if needed --- 

2047# v_ = v 

2048# integration_rays_ = integration_rays 

2049# # utils.check_gp_gradient(gp, x_next, previous_params) 

2050# # Hypercomplex construction for new point 

2051 

2052# # Compute GP predictions (function value and derivatives) 

2053# y_pred = gp.predict(x_next, v_, params, 

2054# calc_cov=False, return_deriv=True).ravel() 

2055 

2056# # How many derivatives do you want to include? 

2057# n_order = gp.n_order 

2058 

2059# # Rebuild y_blocks_next to include the GP predictions 

2060# Y_blocks_next = [] 

2061 

2062# # Append the function prediction 

2063# Y_blocks_next.append(y_pred[0].reshape(-1, 1)) 

2064 

2065# # Append derivatives in order (1st, 2nd, ..., n_order) 

2066# for i in range(1, n_order + 1): 

2067# Y_blocks_next.append(y_pred[i].reshape(-1, 1)) 

2068 

2069# # Augment training set 

2070# X_train = np.vstack([x_train, x_next]) 

2071# for k in range(len(y_blocks)): 

2072# Y_blocks[k] = np.vstack([Y_blocks[k], Y_blocks_next[k]]) 

2073# rays_array_tmp = np.concatenate([rays_array, v_], axis=1) 

2074 

2075# gp_temp = gddegp.gddegp(X_train, Y_blocks, 

2076# n_order=gp.n_order, 

2077# rays_array=rays_array_tmp, 

2078# normalize=True, 

2079# kernel="SE", 

2080# kernel_type="anisotropic",) 

2081 

2082# mu, var_after = gp_temp.predict( 

2083# integration_points, integration_rays_, params, calc_cov=True, return_deriv=False) 

2084# ecl_after = ecl_acquisition( 

2085# mu, var_after, threshold=threshold) 

2086# ier = np.mean(ecl_before - ecl_after) 

2087# return -ier 

2088# # 4. Optimize using L-BFGS-B, with a couple random restarts for robustness 

2089 

2090# D = d # dimension of direction 

2091# lb = np.full(D, -2.0) 

2092# ub = np.full(D, 2.0) 

2093# # Optionally, can try [-1, 1] or any sufficiently wide box (PSO will normalize inside objective) 

2094 

2095# best_w, best_val = pso( 

2096# negative_ier, lb, ub, 

2097# pop_size=10*d, maxiter=11, debug=True, seed=seed + 77 

2098# ) 

2099# v_opt = best_w.reshape(-1, 1) 

2100# v_opt /= np.linalg.norm(v_opt) 

2101 

2102# return v_opt 

2103 

2104 

2105# utils.py 

2106 

2107 

2108# UPDATED function to use the new bounds logic 

2109# def find_next_point( 

2110# gp, 

2111# params, 

2112# x_train, 

2113# y_train_list, 

2114# dist_params, 

2115# acquisition_func, 

2116# integration_points = None, 

2117# n_candidate_points: int = 1024, 

2118# n_local_starts: int = 10, 

2119# use_agg_al = False, 

2120# var_hist = [], 

2121# **acq_kwargs 

2122# ): 

2123# """Finds the next point using distribution-aware optimization bounds.""" 

2124 

2125# if not use_agg_al: 

2126# # --- Stage 1: Coarse Global Search --- 

2127# shape, scale = get_pdf_params(dist_params) 

2128 

2129# sampler = Sobol(d=x_train.shape[1], scramble=True) 

2130# coarse_points_unit = sampler.random(n=n_candidate_points) 

2131# candidate_points = get_inverse(dist_params, coarse_points_unit 

2132# ) 

2133 

2134# candidate_scores = acquisition_func( 

2135# gp, params, x_train, y_train_list, 

2136# candidate_points, integration_points = integration_points, normalize=gp.normalize 

2137# ) 

2138 

2139# top_indices = np.argsort(candidate_scores.ravel())[-n_local_starts:] 

2140# top_starting_points = candidate_points[top_indices] 

2141 

2142# # --- Stage 2: Local Optimization --- 

2143 

2144# # Get the correct bounds for the optimizer 

2145# bounds = get_optimization_bounds(dist_params) 

2146# all_unbounded = all(b[0] is None for b in bounds) 

2147 

2148# def objective_function(x: np.ndarray) -> float: 

2149# x_candidate = x.reshape(1, -1) 

2150# score = acquisition_func( 

2151# gp, params, x_train, y_train_list, 

2152# x_candidate, integration_points = integration_points, normalize=gp.normalize 

2153# ) 

2154# return -score[0] 

2155 

2156# best_x = None 

2157# best_score = -np.inf 

2158 

2159# for start_point in top_starting_points: 

2160# if all_unbounded: 

2161# res = minimize(fun=objective_function, x0=start_point, method="BFGS") 

2162# else: 

2163# res = minimize(fun=objective_function, x0=start_point, method="L-BFGS-B", bounds=bounds) 

2164 

2165# if -res.fun > best_score: 

2166# best_score = -res.fun 

2167# best_x = res.x 

2168 

2169# return best_x.reshape(1, -1), best_score 

2170# else: 

2171# agg_variance = acquisition_func(var_hist) 

2172# max_index = np.argmax(agg_variance) 

2173# return integration_points[max_index,:], agg_variance[0,max_index] 

2174 

2175def _ensure_2d(x): 

2176 x = np.asarray(x) 

2177 if x.ndim == 1: 

2178 return x.reshape(1, -1) 

2179 return x 

2180 

2181def _format_y_to_add(y_pred): 

2182 """Convert gp.predict output at one x to list of 2D arrays for y_train_list append.""" 

2183 if isinstance(y_pred, (list, tuple)): 

2184 return [np.atleast_2d(np.asarray(e)) for e in y_pred] 

2185 else: 

2186 return [np.atleast_2d(np.asarray(y_pred))] 

2187 

2188 

2189# def sample_from_dist(dist, means, var, shape, scale, size): 

2190# num_cols = len(dist) 

2191# num_rows = int(size) 

2192# samples = np.zeros((num_rows, num_cols)) 

2193# for i in range(0, num_cols): 

2194# if dist[i] == 'N': # normal (Gaussian) 

2195# samples[:, i] = stats.norm.rvs( 

2196# loc=shape[i], scale=scale[i], size=size) 

2197# elif dist[i] == 'U': # uniform 

2198# samples[:, i] = stats.uniform.rvs( 

2199# loc=shape[i], scale=scale[i], size=size) 

2200# elif dist[i] == 'LN': # log-normal 

2201# samples[:, i] = stats.lognorm.rvs( 

2202# scale[i], scale=np.exp(shape[i]), size=size) 

2203# # elif dist[i] == 'B': # beta 

2204# # frozen_dist = stats.beta(a=shape[i], b=scale[i]) 

2205# # transformed_samples[:, i] = frozen_dist.ppf(samples[:, i]) 

2206# elif dist[i] == 'T': # triangle (symmetric) 

2207# c = .5 

2208# samples[:, i] = stats.triang.rvs( 

2209# c, loc=shape[i], scale=scale[i], size=size) 

2210# elif dist[i] == 'UTN': 

2211# # samples[:, i] = stats.uniform.rvs( 

2212# # loc=shape[i], scale=scale[i] - shape[i], size=size) 

2213# cf = 3 

2214# # lb = shape[i] + cf*shape[i] 

2215# # ub = scale[i] - cf*shape[i] 

2216# # transformed_samples[:, i] = ( 

2217# # ub - lb)*samples[:, i] + lb 

2218# a, b = (shape[i] - means[i]) / \ 

2219# (cf*var[i]*means[i]), (scale[i] - 

2220# means[i]) / (cf*var[i]*means[i]) 

2221# samples[:, i] = stats.truncnorm.rvs( 

2222# a, b, loc=means[i], scale=cf*var[i]*means[i], size=size) 

2223 

2224# return samples 

2225def get_pdf_params(dist_params): 

2226 """ 

2227 Computes scipy-specific loc/scale parameters. Prioritizes explicit bounds if provided. 

2228 """ 

2229 dists = dist_params['dists'] 

2230 nVar = len(dists) 

2231 rv_a = np.zeros(nVar) # loc 

2232 rv_b = np.zeros(nVar) # scale 

2233 

2234 # Check for optional explicit bounds 

2235 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params 

2236 

2237 for i, dist_name in enumerate(dists): 

2238 # --- Logic for when explicit bounds ARE provided --- 

2239 if has_bounds and dist_name in ['U', 'TN']: 

2240 lb = dist_params['lower_bounds'][i] 

2241 ub = dist_params['upper_bounds'][i] 

2242 if dist_name == 'U': 

2243 rv_a[i] = lb # loc = lower bound 

2244 rv_b[i] = ub - lb # scale = width 

2245 elif dist_name == 'TN': 

2246 rv_a[i] = dist_params['means'][i] 

2247 rv_b[i] = np.sqrt(dist_params['vars'][i]) 

2248 

2249 # --- Fallback logic when bounds are NOT provided --- 

2250 else: 

2251 stdev = np.sqrt(dist_params['vars'][i]) 

2252 mean = dist_params['means'][i] 

2253 if dist_name == 'U': 

2254 rv_a[i] = mean - np.sqrt(3) * stdev 

2255 rv_b[i] = 2 * np.sqrt(3) * stdev 

2256 elif dist_name == 'N': 

2257 rv_a[i] = mean 

2258 rv_b[i] = stdev 

2259 # (Add other fallbacks here if needed) 

2260 

2261 return rv_a, rv_b 

2262 

2263def get_optimization_bounds(dist_params): 

2264 """ 

2265 Determines optimization bounds. Prioritizes explicit bounds if provided. 

2266 """ 

2267 bounds = [] 

2268 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params 

2269 

2270 for i, dist_name in enumerate(dist_params['dists']): 

2271 # --- Use explicit bounds if available for bounded distributions --- 

2272 if has_bounds and dist_name in ['U', 'TN', 'B']: 

2273 bounds.append((dist_params['lower_bounds'][i], dist_params['upper_bounds'][i])) 

2274 

2275 # --- Fallback logic or for other distributions --- 

2276 elif dist_name == 'B': # Beta is on [0, 1] by default 

2277 bounds.append((0, 1)) 

2278 elif dist_name in ['N', 'LN']: # Unbounded 

2279 bounds.append((None, None)) 

2280 else: # Fallback for U/TN if bounds were not explicitly given 

2281 stdev = np.sqrt(dist_params['vars'][i]) 

2282 mean = dist_params['means'][i] 

2283 if dist_name == 'U': 

2284 lb = mean - np.sqrt(3) * stdev 

2285 ub = mean + np.sqrt(3) * stdev 

2286 bounds.append((lb, ub)) 

2287 elif dist_name == 'TN': 

2288 lb = mean - 3 * stdev 

2289 ub = mean + 3 * stdev 

2290 bounds.append((lb, ub)) 

2291 else: 

2292 bounds.append((None, None)) 

2293 return bounds 

2294 

2295 

2296def get_inverse(dist_params, samples): 

2297 """ 

2298 Transforms uniform samples to a specified distribution via inverse CDF. 

2299 """ 

2300 dists = dist_params['dists'] 

2301 loc_params, scale_params = get_pdf_params(dist_params) 

2302 has_bounds = 'lower_bounds' in dist_params and 'upper_bounds' in dist_params 

2303 

2304 transformed_samples = np.zeros(samples.shape) 

2305 for i in range(samples.shape[1]): 

2306 dist_name = dists[i] 

2307 loc = loc_params[i] 

2308 scale = scale_params[i] 

2309 

2310 if dist_name == 'TN': 

2311 mean = dist_params['means'][i] 

2312 stdev = np.sqrt(dist_params['vars'][i]) 

2313 if has_bounds: 

2314 lb = dist_params['lower_bounds'][i] 

2315 ub = dist_params['upper_bounds'][i] 

2316 else: # Fallback 

2317 lb = mean - 3 * stdev 

2318 ub = mean + 3 * stdev 

2319 

2320 a, b = (lb - mean) / stdev, (ub - mean) / stdev 

2321 frozen_dist = stats.truncnorm(a, b, loc=mean, scale=stdev) 

2322 

2323 elif dist_name == 'U': 

2324 frozen_dist = stats.uniform(loc=loc, scale=scale) 

2325 

2326 elif dist_name == 'N': 

2327 frozen_dist = stats.norm(loc=loc, scale=scale) 

2328 

2329 # (Add other distributions here) 

2330 

2331 transformed_samples[:, i] = frozen_dist.ppf(samples[:, i]) 

2332 return transformed_samples 

2333 

2334# (The find_next_point function does not need to be changed from the previous version)