Coverage for jetgp/wdegp/wdegp.py: 86%
398 statements
« prev ^ index » next coverage.py v7.10.7, created at 2026-04-03 15:09 -0500
« prev ^ index » next coverage.py v7.10.7, created at 2026-04-03 15:09 -0500
1"""
2Unified Weighted Derivative-Enhanced Gaussian Process (WDEGP)
3==============================================================
5Supports DEGP, DDEGP, or GDDEGP mode for all submodels.
7Submodel Types:
8- 'degp': Coordinate-aligned derivatives (standard DEGP)
9- 'ddegp': Global directional derivatives (same rays at all points)
10- 'gddegp': Point-wise directional derivatives (unique rays per point)
11"""
13import numpy as np
14from numpy.linalg import cholesky
15from scipy.linalg import cho_solve, cho_factor, solve_triangular
16import jetgp.utils as utils
17from jetgp.kernel_funcs.kernel_funcs import KernelFactory, get_oti_module
20class wdegp:
21 """
22 Unified Weighted Derivative-Enhanced Gaussian Process (WDEGP) regression model.
24 Supports multiple submodels with DEGP, DDEGP, or GDDEGP derivative structure.
25 All submodels use the same derivative type.
27 Parameters
28 ----------
29 x_train : ndarray of shape (n_samples, n_features)
30 Input training points.
31 y_train : list of lists of arrays
32 Each element is a submodel's data: [y_func, y_der1, y_der2, ...]
33 n_order : int
34 Maximum derivative order to be supported.
35 n_bases : int
36 Number of OTI basis terms used.
37 der_indices : list of lists
38 Multi-indices of derivatives for each submodel.
39 derivative_locations : list of lists of lists, optional
40 For each submodel, which points have which derivatives.
41 derivative_locations[submodel][deriv_type] = [point_indices]
42 If None, all points have all derivatives.
43 submodel_type : str, default='degp'
44 Type of derivative structure: 'degp', 'ddegp', or 'gddegp'.
45 rays : ndarray, optional
46 For 'ddegp' mode: global ray directions, shape (d, n_directions).
47 All submodels share these rays.
48 rays_list : list of list of ndarray, optional
49 For 'gddegp' mode: point-wise rays organized by submodel.
50 rays_list[submodel_idx][dir_idx] has shape (d, n_points_with_dir).
51 Example: rays_list[0] = [rays_dir1_sm1, rays_dir2_sm1] for submodel 1.
52 normalize : bool, default=True
53 If True, normalizes the input and output data.
54 sigma_data : float or ndarray, optional
55 Known observation noise or covariance matrix.
56 kernel : str, default='SE'
57 Type of kernel to use: 'SE', 'RQ', 'Matern', or 'SineExp'.
58 kernel_type : str, default='anisotropic'
59 Whether kernel is 'anisotropic' or 'isotropic'.
60 smoothness_parameter : float, optional
61 Smoothness parameter for Matern kernel.
62 """
64 def __init__(
65 self,
66 x_train,
67 y_train,
68 n_order,
69 n_bases,
70 der_indices,
71 derivative_locations=None,
72 submodel_type='degp',
73 rays=None,
74 rays_list=None,
75 normalize=True,
76 sigma_data=None,
77 kernel="SE",
78 kernel_type="anisotropic",
79 smoothness_parameter=None
80 ):
81 # Store basic parameters
82 self.x_train = x_train
83 self.y_train = y_train
84 self.n_order = n_order
85 self.n_bases = n_bases
86 self.der_indices = der_indices
87 self.derivative_locations = derivative_locations
88 self.submodel_type = submodel_type
89 self.rays = rays
90 self.rays_list = rays_list
91 self.normalize = normalize
92 self.kernel = kernel
93 self.kernel_type = kernel_type
95 if submodel_type == 'degp' or submodel_type == 'ddegp':
96 self.oti = get_oti_module(self.n_bases, n_order)
97 elif submodel_type == 'gddegp':
98 self.oti = get_oti_module(2*self.n_bases, n_order)
100 self.num_points = len(x_train)
101 self.dim = x_train.shape[1]
102 self.num_submodels = len(y_train)
104 # Store original input for reference
105 self.y_train_input = [yt.copy() if hasattr(yt, 'copy') else yt for yt in y_train]
106 self.x_train_input = x_train.copy()
108 # Validate configuration
109 self._validate_config()
111 # Set up derivative_locations defaults
112 self._setup_derivative_locations()
114 # Process derivative indices
115 self._setup_derivative_indices()
117 # Handle sigma_data
118 if sigma_data is None:
119 sigma_data = np.zeros(self._compute_total_constraints())
120 self.sigma_data = np.diag(sigma_data)
122 # Normalize if requested
123 if normalize:
124 self._normalize_data()
125 else:
126 self.y_train_normalized = [utils.reshape_y_train(submodel) for submodel in y_train]
127 self.x_train_normalized = x_train
129 # Precompute differences
130 self._precompute_differences()
132 # Set up kernel
133 self.kernel_factory = KernelFactory(
134 dim=self.dim,
135 normalize=self.normalize,
136 n_order=self.n_order,
137 differences_by_dim=self.differences_by_dim,
138 smoothness_parameter=smoothness_parameter,
139 oti_module=self.oti
140 )
141 self.kernel_func = self.kernel_factory.create_kernel(
142 kernel_name=self.kernel,
143 kernel_type=self.kernel_type,
144 )
145 self.bounds = self.kernel_factory.bounds
147 # Set up optimizer
148 from jetgp.wdegp.optimizer import Optimizer
149 self.optimizer = Optimizer(self)
151 def _validate_config(self):
152 """Validate the configuration parameters."""
153 valid_types = ['degp', 'ddegp', 'gddegp']
154 if self.submodel_type not in valid_types:
155 raise ValueError(f"submodel_type must be one of {valid_types}, got '{self.submodel_type}'")
157 if self.submodel_type == 'ddegp':
158 if self.rays is None:
159 raise ValueError("rays parameter is required for submodel_type='ddegp'")
160 if self.rays.shape[0] != self.dim:
161 raise ValueError(f"rays must have shape (d, n_directions), got {self.rays.shape}")
163 if self.submodel_type == 'gddegp':
164 if self.rays_list is None:
165 raise ValueError("rays_list parameter is required for submodel_type='gddegp'")
166 if len(self.rays_list) != self.num_submodels:
167 raise ValueError(
168 f"rays_list must have {self.num_submodels} entries (one per submodel), "
169 f"got {len(self.rays_list)}"
170 )
171 for sm_idx, sm_rays in enumerate(self.rays_list):
172 for dir_idx, r in enumerate(sm_rays):
173 if r.shape[0] != self.dim:
174 raise ValueError(
175 f"rays_list[{sm_idx}][{dir_idx}] must have shape (d, n_points), "
176 f"got {r.shape}"
177 )
179 def _setup_derivative_locations(self):
180 """Set up derivative_locations with defaults."""
181 if self.derivative_locations is None:
182 # Default: all points have all derivatives for all submodels
183 self.derivative_locations = []
184 for submodel_idx in range(self.num_submodels):
185 n_derivs = self._count_derivatives_for_submodel(submodel_idx)
186 self.derivative_locations.append(
187 [list(range(self.num_points))] * n_derivs
188 )
190 # For GDDEGP, validate rays_list matches derivative_locations per submodel
191 if self.submodel_type == 'gddegp':
192 for sm_idx, sm_rays in enumerate(self.rays_list):
193 submodel_locs = self.derivative_locations[sm_idx]
194 for dir_idx, r in enumerate(sm_rays):
195 if dir_idx < len(submodel_locs):
196 expected_size = len(submodel_locs[dir_idx])
197 if r.shape[1] != expected_size:
198 raise ValueError(
199 f"rays_list[{sm_idx}][{dir_idx}] has {r.shape[1]} columns but "
200 f"derivative_locations[{sm_idx}][{dir_idx}] expects {expected_size} points"
201 )
203 def _count_derivatives_for_submodel(self, submodel_idx):
204 """Count the number of derivative types for a submodel."""
205 # Flatten der_indices for this submodel
206 der_idx = self.der_indices[submodel_idx]
207 count = 0
208 for group in der_idx:
209 count += len(group)
210 return count
212 def _setup_derivative_indices(self):
213 """Process and flatten derivative indices for each submodel."""
214 self.flattened_der_indices = []
215 self.powers = []
217 base_der_indices = utils.gen_OTI_indices(self.n_bases, self.n_order)
219 for submodel_idx, ders in enumerate(self.der_indices):
220 self.powers.append(
221 utils.build_companion_array(self.n_bases, self.n_order, ders)
222 )
223 flat_indices = [i for sublist in ders for i in sublist]
224 self.flattened_der_indices.append(flat_indices)
226 def _compute_total_constraints(self):
227 """Compute total number of constraints across all submodels."""
228 total = 0
229 for submodel_idx in range(self.num_submodels):
230 # Function values
231 total += self.num_points
232 # Derivative values
233 for locs in self.derivative_locations[submodel_idx]:
234 total += len(locs)
235 return total
237 def _normalize_data(self):
238 """Normalize input and output data."""
239 self.y_train_normalized = []
241 if self.submodel_type == 'degp':
242 # Standard DEGP normalization
243 for k, submodel_data in enumerate(self.y_train):
244 y_norm, self.mu_y, self.sigma_y, self.sigmas_x, self.mus_x, _ = \
245 utils.normalize_y_data(
246 self.x_train,
247 submodel_data,
248 np.zeros(len(submodel_data)), # placeholder
249 self.flattened_der_indices[k]
250 )
251 self.y_train_normalized.append(y_norm)
253 elif self.submodel_type == 'ddegp':
254 # DDEGP uses directional normalization
255 for k, submodel_data in enumerate(self.y_train):
256 y_norm, self.mu_y, self.sigma_y, self.sigmas_x, self.mus_x, _ = \
257 utils.normalize_y_data_directional(
258 self.x_train,
259 submodel_data,
260 np.zeros(len(submodel_data)), # placeholder
261 self.flattened_der_indices[k]
262 )
264 self.y_train_normalized.append(y_norm)
265 self.rays = self.rays / self.sigmas_x.flatten()[:, None]
266 elif self.submodel_type == 'gddegp':
267 # GDDEGP uses directional normalization
268 for k, submodel_data in enumerate(self.y_train):
269 y_norm, self.mu_y, self.sigma_y, self.sigmas_x, self.mus_x, _ = \
270 utils.normalize_y_data_directional(
271 self.x_train,
272 submodel_data,
273 np.zeros(len(submodel_data)), # placeholder
274 self.flattened_der_indices[k]
275 )
276 self.y_train_normalized.append(y_norm)
277 # Normalize rays_list using normalize_directions_2
278 self.rays_list = [
279 utils.normalize_directions_2(self.sigmas_x, sm_rays)
280 for sm_rays in self.rays_list
281 ]
283 self.x_train_normalized = utils.normalize_x_data_train(self.x_train)
285 def _precompute_differences(self):
286 """Precompute differences_by_dim based on submodel_type."""
287 x = self.x_train_normalized if self.normalize else self.x_train
289 if self.submodel_type == 'degp':
290 from jetgp.wdegp import wdegp_utils
291 self.differences_by_dim = wdegp_utils.differences_by_dim_func(
292 x, x, self.n_order, self.oti
293 )
294 elif self.submodel_type == 'ddegp':
295 from jetgp.full_ddegp import wddegp_utils
296 # Use first submodel's derivative_locations as reference
297 # (all submodels share same rays structure)
298 self.differences_by_dim = wddegp_utils.differences_by_dim_func(
299 x, x,
300 self.rays,
301 self.n_order,
302 self.oti,
303 return_deriv=True
304 )
305 elif self.submodel_type == 'gddegp':
306 from jetgp.full_gddegp import wgddegp_utils
308 # For GDDEGP, combine rays from all submodels into global structure
309 # Each submodel may have a different number of direction types
310 max_n_dirs = max(len(sm_rays) for sm_rays in self.rays_list)
312 global_rays = []
313 global_derivative_locations = []
315 for dir_idx in range(max_n_dirs):
316 # Concatenate rays and locations for this direction from all submodels
317 rays_for_dir = []
318 locs_for_dir = []
319 for sm_idx in range(self.num_submodels):
320 sm_rays = self.rays_list[sm_idx]
321 sm_locs = self.derivative_locations[sm_idx]
322 # Only include if this submodel has this direction type
323 if dir_idx < len(sm_rays):
324 rays_for_dir.append(sm_rays[dir_idx])
325 locs_for_dir.extend(sm_locs[dir_idx])
327 if rays_for_dir: # Only add if at least one submodel has this direction
328 global_rays.append(np.hstack(rays_for_dir))
329 global_derivative_locations.append(locs_for_dir)
331 # Store global structures for use in kernel computations
332 self.global_rays = global_rays
333 self.global_derivative_locations = global_derivative_locations
335 self.differences_by_dim = wgddegp_utils.differences_by_dim_func(
336 x, x,
337 global_rays, global_rays,
338 global_derivative_locations, global_derivative_locations,
339 self.n_order,
340 self.oti,
341 return_deriv=True
342 )
344 def _get_utils_module(self):
345 """Get the appropriate utils module based on submodel_type."""
346 if self.submodel_type == 'degp':
347 from jetgp.wdegp import wdegp_utils
348 return wdegp_utils
349 elif self.submodel_type == 'ddegp':
350 from jetgp.full_ddegp import wddegp_utils
351 return wddegp_utils
352 elif self.submodel_type == 'gddegp':
353 from jetgp.full_gddegp import wgddegp_utils
354 return wgddegp_utils
356 def optimize_hyperparameters(self, *args, **kwargs):
357 """
358 Optimize hyperparameters via the configured optimizer.
360 Returns
361 -------
362 ndarray
363 Optimized hyperparameter vector.
364 """
365 return self.optimizer.optimize_hyperparameters(*args, **kwargs)
367 def predict(
368 self,
369 X_test,
370 length_scales,
371 calc_cov=False,
372 return_deriv=False,
373 return_submodels=False,
374 rays_predict=None,
375 derivs_to_predict=None
376 ):
377 """
378 Compute posterior predictive mean and (optionally) covariance at test points.
380 Parameters
381 ----------
382 X_test : ndarray of shape (n_test, n_features)
383 Test input points.
384 length_scales : ndarray
385 Log-scaled kernel hyperparameters including noise level.
386 calc_cov : bool, default=False
387 If True, also compute and return predictive covariance.
388 return_deriv : bool, default=False
389 If True, also predict derivatives (requires rays_predict for GDDEGP).
390 return_submodels : bool, default=False
391 If True, return submodel-specific contributions.
392 rays_predict : list of ndarray, optional
393 For 'gddegp' mode with return_deriv=True: rays at test points.
394 rays_predict[dir_idx] has shape (d, n_test).
395 derivs_to_predict : list, optional
396 Specific derivatives to predict. Can include derivatives not present in the
397 training set of any submodel — each submodel constructs K_* from kernel
398 derivatives directly. If None, defaults to all derivatives common to all
399 submodels.
401 Returns
402 -------
403 y_val : ndarray
404 Predicted mean values. Shape depends on return_deriv.
405 y_var : ndarray, optional
406 Predictive variances (only if calc_cov=True).
407 submodel_vals : list of ndarrays, optional
408 Submodel predictions (only if return_submodels=True).
409 submodel_cov : list of ndarrays, optional
410 Submodel variances (only if calc_cov and return_submodels are True).
411 """
412 import warnings
414 gp_utils = self._get_utils_module()
416 ell = length_scales[:-1]
417 sigma_n = length_scales[-1]
418 n_test = X_test.shape[0]
419 n_train = self.x_train.shape[0]
421 # =========================================================================
422 # Validation checks for rays_predict parameter
423 # =========================================================================
425 # Check 1: Warning when rays_predict provided but return_deriv=False
426 if not return_deriv and rays_predict is not None:
427 warnings.warn(
428 "rays_predict provided but return_deriv=False. "
429 "The rays will be ignored.",
430 UserWarning
431 )
433 # Check 2: Handle missing rays_predict for GDDEGP/DDEGP with return_deriv=True
434 if return_deriv and rays_predict is None:
435 if self.submodel_type == 'gddegp':
436 # Determine number of ray directions from training
437 n_rays = len(self.global_rays)
438 warnings.warn(
439 f"No rays_predict provided for GDDEGP with return_deriv=True. "
440 f"Using coordinate axes as default prediction rays ({n_rays} directions).",
441 UserWarning
442 )
443 # Build default coordinate axis rays
444 rays_predict = []
445 for i in range(n_rays):
446 axis_idx = i % self.n_bases
447 ray_array = np.zeros((self.n_bases, n_test))
448 ray_array[axis_idx, :] = 1.0
449 rays_predict.append(ray_array)
451 elif self.submodel_type == 'ddegp':
452 # DDEGP uses global rays, no rays_predict needed
453 pass
455 # Check 3: Validate rays_predict structure when provided and return_deriv=True
456 if return_deriv and rays_predict is not None:
457 if self.submodel_type == 'gddegp':
458 # Validate number of ray directions
459 n_expected_rays = len(self.global_rays)
460 if len(rays_predict) != n_expected_rays:
461 raise ValueError(
462 f"Number of prediction ray directions ({len(rays_predict)}) "
463 f"does not match training ray directions ({n_expected_rays})"
464 )
466 # Validate each ray array
467 for i, ray_array in enumerate(rays_predict):
468 if not isinstance(ray_array, np.ndarray):
469 raise TypeError(
470 f"rays_predict[{i}] must be a numpy ndarray, "
471 f"got {type(ray_array).__name__}"
472 )
473 if ray_array.ndim != 2:
474 raise ValueError(
475 f"rays_predict[{i}] must be 2-dimensional, "
476 f"got {ray_array.ndim} dimensions"
477 )
478 if ray_array.shape[0] != self.dim:
479 raise ValueError(
480 f"rays_predict[{i}] has {ray_array.shape[0]} rows, "
481 f"expected {self.dim} (number of input dimensions)"
482 )
483 if ray_array.shape[1] != n_test:
484 raise ValueError(
485 f"rays_predict[{i}] has {ray_array.shape[1]} columns, "
486 f"expected {n_test} (number of test points)"
487 )
489 elif self.submodel_type == 'ddegp':
490 # For DDEGP, rays_predict should match global rays structure
491 if not isinstance(rays_predict, np.ndarray):
492 raise TypeError(
493 f"rays_predict for DDEGP must be a numpy ndarray, "
494 f"got {type(rays_predict).__name__}"
495 )
496 if rays_predict.shape[0] != self.dim:
497 raise ValueError(
498 f"rays_predict has {rays_predict.shape[0]} rows, "
499 f"expected {self.dim} (number of input dimensions)"
500 )
502 elif self.submodel_type == 'degp':
503 # DEGP doesn't use rays, warn if provided
504 warnings.warn(
505 "rays_predict provided but submodel_type='degp' does not use rays. "
506 "The rays will be ignored.",
507 UserWarning
508 )
510 # =========================================================================
511 # End of validation checks
512 # =========================================================================
514 # Normalize test inputs
515 if self.normalize:
516 X_test_norm = utils.normalize_x_data_test(X_test, self.sigmas_x, self.mus_x)
517 if rays_predict is not None and self.submodel_type == 'gddegp':
518 rays_predict = utils.normalize_directions_2(
519 self.sigmas_x, rays_predict)
520 else:
521 X_test_norm = X_test
523 x_train = self.x_train_normalized if self.normalize else self.x_train
525 # Determine which derivatives to predict across submodels
526 if return_deriv:
527 if derivs_to_predict is not None:
528 common_derivs = derivs_to_predict
529 else:
530 if self.n_order == 0:
531 raise ValueError(
532 "derivs_to_predict must be provided when predicting derivatives "
533 "from a model trained with n_order=0 (no derivative training data)."
534 )
535 common = gp_utils.find_common_derivatives(self.flattened_der_indices)
536 common_derivs = [gp_utils.to_list(d) for d in common]
537 print('Making predictions for all derivatives that are common among submodels')
539 # Determine prediction order from requested derivatives
540 required_order = max(
541 sum(pair[1] for pair in deriv_spec)
542 for deriv_spec in common_derivs
543 )
544 predict_order = max(required_order, self.n_order)
546 if predict_order > self.n_order:
547 if self.submodel_type == 'degp' or self.submodel_type == 'ddegp':
548 predict_oti = get_oti_module(self.n_bases, predict_order)
549 elif self.submodel_type == 'gddegp':
550 predict_oti = get_oti_module(2 * self.n_bases, predict_order)
551 smoothness_param = getattr(self.kernel_factory, 'alpha', None)
552 predict_kernel_factory = KernelFactory(
553 dim=self.dim,
554 normalize=self.normalize,
555 differences_by_dim=self.differences_by_dim,
556 n_order=predict_order,
557 smoothness_parameter=smoothness_param,
558 oti_module=predict_oti
559 )
560 predict_kernel_func = predict_kernel_factory.create_kernel(
561 kernel_name=self.kernel, kernel_type=self.kernel_type
562 )
563 else:
564 predict_oti = self.oti
565 predict_kernel_func = self.kernel_func
567 self.powers_predict = utils.build_companion_array_predict(
568 self.n_bases, predict_order, common_derivs)
569 else:
570 common_derivs = []
571 self.powers_predict = None
572 predict_order = self.n_order
573 predict_oti = self.oti
574 predict_kernel_func = self.kernel_func
576 y_val = 0
577 y_var = 0
578 submodel_vals = []
579 submodel_cov = []
581 # For multiple submodels, compute weights (using function-only differences)
582 if self.num_submodels > 1:
583 if self.submodel_type == 'degp':
584 from jetgp.wdegp import wdegp_utils
585 diffs_for_weights = wdegp_utils.differences_by_dim_func(
586 X_test_norm, x_train, 0,self.oti, return_deriv=False
587 )
588 else:
589 diffs_for_weights = self._compute_weight_differences(X_test_norm, x_train)
591 # Use train-train differences for weight computation
592 diffs_train_for_weights = self.differences_by_dim
594 weights_matrix = gp_utils.determine_weights(
595 diffs_train_for_weights, diffs_for_weights, ell, self.kernel_func, sigma_n
596 )
598 # Hoist shared computations out of the submodel loop.
599 # OTI kernel_func reuses internal buffers, so a second call corrupts
600 # the imaginary parts of previously returned OTI objects. We use
601 # .copy() to snapshot each phi into independent memory before the
602 # next kernel_func call overwrites the shared buffer.
603 diffs_train_train = self.differences_by_dim
604 diffs_train_test = self._compute_train_test_differences(
605 x_train, X_test_norm, return_deriv, rays_predict,
606 predict_order=predict_order, predict_oti=predict_oti
607 )
609 phi_train_train = self.kernel_func(diffs_train_train, ell).copy()
610 if self.n_order == 0:
611 self.n_bases_rays = 0
612 phi_exp_train_train = phi_train_train.real
613 phi_exp_train_train = phi_exp_train_train[np.newaxis, :, :]
614 else:
615 self.n_bases_rays = phi_train_train.get_active_bases()[-1]
616 phi_exp_train_train = phi_train_train.get_all_derivs(self.n_bases_rays, 2 * self.n_order)
618 phi_train_test = predict_kernel_func(diffs_train_test, ell).copy()
619 if predict_order > 0:
620 if return_deriv:
621 phi_exp_train_test = phi_train_test.get_all_derivs(self.n_bases_rays, 2 * predict_order)
622 else:
623 phi_exp_train_test = phi_train_test.get_all_derivs(self.n_bases_rays, predict_order)
624 else:
625 phi_exp_train_test = phi_train_test.real
626 phi_exp_train_test = phi_exp_train_test[np.newaxis, :, :]
628 # Pre-compute test-test kernel if covariance is requested
629 phi_test_test = None
630 phi_exp_test_test = None
631 if calc_cov:
632 diffs_test_test = self._compute_test_test_differences(
633 X_test_norm, return_deriv, rays_predict,
634 predict_order=predict_order, predict_oti=predict_oti
635 )
636 phi_test_test = predict_kernel_func(diffs_test_test, ell).copy()
637 if predict_order == 0:
638 phi_exp_test_test = phi_test_test.real[np.newaxis, :, :]
639 else:
640 phi_exp_test_test = phi_test_test.get_all_derivs(self.n_bases_rays, 2 * predict_order)
642 # Loop over submodels
643 for i in range(self.num_submodels):
644 deriv_locs_i = self.derivative_locations[i]
646 # Build training kernel matrix (per-submodel: different indices/powers)
647 K = gp_utils.rbf_kernel(
648 phi_train_train, phi_exp_train_train, self.n_order, self.n_bases,
649 self.flattened_der_indices[i], self.powers[i],
650 index=deriv_locs_i
651 )
652 K += (10 ** sigma_n) ** 2 * np.eye(len(K))
654 # Solve linear system
655 try:
656 cho_solve_failed = False
657 L, low = cho_factor(K, lower=True)
658 alpha = cho_solve((L, low), self.y_train_normalized[i])
659 except Exception as e:
660 cho_solve_failed = True
661 alpha = np.linalg.solve(K, self.y_train_normalized[i])
662 print('Warning: Cholesky decomposition failed, using standard solve.')
664 K_s = gp_utils.rbf_kernel_predictions(
665 phi_train_test, phi_exp_train_test, predict_order, self.n_bases_rays,
666 self.flattened_der_indices[i], self.powers[i],
667 return_deriv=return_deriv,
668 index=deriv_locs_i,
669 common_derivs=common_derivs,
670 powers_predict=self.powers_predict
671 )
673 # Compute predictive mean
674 if self.submodel_type == 'gddegp':
675 K_s = K_s.T
676 f_mean = K_s.T @ alpha
677 else:
678 f_mean = K_s.T @ alpha
679 f_mean = f_mean.reshape(-1, 1)
681 # Denormalize predictions
682 if self.normalize:
683 if return_deriv:
684 if self.submodel_type == 'gddegp' or self.submodel_type == 'ddegp':
685 f_mean = utils.transform_predictions_directional(
686 f_mean, self.mu_y, self.sigma_y, self.sigmas_x,
687 common_derivs, X_test)
688 else:
689 f_mean = utils.transform_predictions(
690 f_mean, self.mu_y, self.sigma_y, self.sigmas_x,
691 common_derivs, X_test
692 )
693 else:
694 f_mean = self.mu_y + f_mean * self.sigma_y
696 # Reshape predictions
697 n = X_test.shape[0]
698 m = f_mean.shape[0]
699 num_derivs = m // n
700 reshaped = f_mean.reshape(num_derivs, n)
702 if self.n_order == 0 and not calc_cov:
703 return reshaped
705 # Apply weights for multiple submodels
706 if self.num_submodels > 1:
707 # Compute weight for this submodel
708 unique_indices = set()
709 for subindex in deriv_locs_i:
710 unique_indices.update(subindex)
711 unique_indices = sorted(unique_indices)
713 weight = np.zeros(weights_matrix.shape[0])
714 for idx in unique_indices:
715 weight += weights_matrix[:, idx]
717 if return_submodels:
718 submodel_vals.append(reshaped.copy())
720 reshaped_weighted = reshaped * weight
721 else:
722 reshaped_weighted = reshaped
723 if return_submodels:
724 raise ValueError('Cannot return submodels for a single model')
726 y_val += reshaped_weighted
728 # Compute covariance if requested
729 if calc_cov:
730 f_var = self._compute_predictive_variance(
731 X_test_norm, deriv_locs_i, common_derivs, self.powers_predict,
732 ell, i, K, K_s, L if not cho_solve_failed else None,
733 low if not cho_solve_failed else None, cho_solve_failed,
734 return_deriv, rays_predict,
735 predict_order=predict_order, predict_oti=predict_oti,
736 predict_kernel_func=predict_kernel_func,
737 phi_test_test_cached=phi_test_test,
738 phi_exp_test_test_cached=phi_exp_test_test
739 )
741 if self.num_submodels > 1:
742 f_var_reshaped = f_var.reshape(num_derivs, n)
743 if self.n_order == 0:
744 return reshaped, f_var_reshaped
745 if return_submodels:
746 submodel_cov.append(f_var_reshaped.copy())
747 f_var_reshaped = f_var_reshaped * weight
748 y_var += f_var_reshaped
749 else:
750 y_var = f_var.reshape(num_derivs, n)
752 # Return results
753 if self.num_submodels == 1:
754 if calc_cov:
755 return (y_val, y_var ** 2)
756 return y_val
757 else:
758 if return_submodels:
759 if calc_cov:
760 return (y_val, y_var ** 2, submodel_vals, submodel_cov)
761 return (y_val, submodel_vals)
762 else:
763 if calc_cov:
764 return (y_val, y_var ** 2)
765 return y_val
767 def _compute_train_test_differences(self, x_train, X_test, return_deriv, rays_predict,
768 submodel_idx=0, predict_order=None, predict_oti=None):
769 """Compute train-test differences based on submodel_type."""
770 p_order = predict_order if predict_order is not None else self.n_order
771 p_oti = predict_oti if predict_oti is not None else self.oti
772 if self.submodel_type == 'degp':
773 from jetgp.wdegp import wdegp_utils
774 return wdegp_utils.differences_by_dim_func(
775 x_train, X_test, p_order, p_oti, return_deriv=return_deriv
776 )
777 elif self.submodel_type == 'ddegp':
778 from jetgp.full_ddegp import wddegp_utils
779 deriv_locs_test = [list(range(len(X_test)))] * self.rays.shape[1] if return_deriv else None
780 return wddegp_utils.differences_by_dim_func(
781 x_train, X_test,
782 self.rays,
783 p_order,
784 p_oti,
785 return_deriv=return_deriv
786 )
787 elif self.submodel_type == 'gddegp':
788 from jetgp.full_gddegp import gddegp_utils
789 n_dirs = len(self.global_rays)
790 rays_test = rays_predict if return_deriv and rays_predict is not None else None
791 deriv_locs_test = [list(range(len(X_test)))] * n_dirs if return_deriv else None
792 return gddegp_utils.differences_by_dim_func(
793 x_train, X_test,
794 self.global_rays, rays_test,
795 self.global_derivative_locations, deriv_locs_test,
796 p_order,
797 p_oti,
798 return_deriv=return_deriv
799 )
801 def _compute_weight_differences(self, X_test, x_train):
802 """Compute differences for weight calculation (always without derivatives)."""
803 if self.submodel_type == 'degp':
804 from jetgp.wdegp import wdegp_utils
805 return wdegp_utils.differences_by_dim_func(X_test, x_train, 0,self.oti, return_deriv=False)
806 elif self.submodel_type == 'ddegp':
807 from jetgp.full_ddegp import wddegp_utils
808 return wddegp_utils.differences_by_dim_func(
809 X_test, x_train, self.rays, 0,self.oti, return_deriv=False
810 )
811 elif self.submodel_type == 'gddegp':
812 from jetgp.full_gddegp import wgddegp_utils
813 return wgddegp_utils.differences_by_dim_func(
814 X_test, x_train, None, None, None, None, 0,self.oti, return_deriv=False
815 )
817 def _compute_predictive_variance(
818 self, X_test, deriv_locs_i, common_derivs, powers_predict, ell, submodel_idx, K, K_s, L, low, cho_solve_failed,
819 return_deriv, rays_predict, predict_order=None, predict_oti=None, predict_kernel_func=None,
820 phi_test_test_cached=None, phi_exp_test_test_cached=None
821 ):
822 """Compute predictive variance for a submodel."""
823 gp_utils = self._get_utils_module()
824 p_order = predict_order if predict_order is not None else self.n_order
826 # Use pre-computed test-test kernel if available, otherwise compute
827 if phi_test_test_cached is not None:
828 phi_test_test = phi_test_test_cached
829 phi_exp_test_test = phi_exp_test_test_cached
830 else:
831 p_kernel_func = predict_kernel_func if predict_kernel_func is not None else self.kernel_func
832 diffs_test_test = self._compute_test_test_differences(
833 X_test, return_deriv, rays_predict, submodel_idx=submodel_idx,
834 predict_order=p_order, predict_oti=predict_oti
835 )
836 phi_test_test = p_kernel_func(diffs_test_test, ell).copy()
837 if p_order == 0:
838 phi_exp_test_test = phi_test_test.real[np.newaxis, :, :]
839 else:
840 bases = phi_test_test.get_active_bases()
841 n_bases = bases[-1]
842 phi_exp_test_test = phi_test_test.get_all_derivs(n_bases, 2 * p_order)
844 deriv_locs_test = [list(range(len(X_test)))] * len(self.derivative_locations[submodel_idx]) if return_deriv else None
846 K_ss = gp_utils.rbf_kernel_predictions(
847 phi_test_test, phi_exp_test_test, p_order, self.n_bases_rays,
848 self.flattened_der_indices[submodel_idx], self.powers[submodel_idx],
849 return_deriv=return_deriv,
850 index=deriv_locs_test,
851 common_derivs=common_derivs,
852 calc_cov=True,
853 powers_predict=powers_predict
854 )
856 n_test = len(X_test)
858 if cho_solve_failed:
859 f_cov = K_ss - K_s.T @ np.linalg.solve(K, K_s)
861 else:
862 v = solve_triangular(L, K_s, lower=low)
863 f_cov = K_ss - v.T @ v
865 if self.normalize:
866 if self.submodel_type == 'gddegp' or 'ddegp':
867 f_var = utils.transform_cov_directional(
868 f_cov, self.sigma_y, self.sigmas_x,
869 common_derivs, X_test)
870 else:
871 f_var = utils.transform_cov(
872 f_cov, self.sigma_y, self.sigmas_x,
873 common_derivs, X_test
874 )
875 else:
876 f_var = np.diag(np.abs(f_cov))
878 return np.sqrt(f_var)
880 def _compute_test_test_differences(self, X_test, return_deriv, rays_predict,
881 submodel_idx=0, predict_order=None, predict_oti=None):
882 """Compute test-test differences for covariance."""
883 p_order = predict_order if predict_order is not None else self.n_order
884 p_oti = predict_oti if predict_oti is not None else self.oti
885 if self.submodel_type == 'degp':
886 from jetgp.wdegp import wdegp_utils
887 return wdegp_utils.differences_by_dim_func(
888 X_test, X_test, p_order, p_oti, return_deriv=return_deriv
889 )
890 elif self.submodel_type == 'ddegp':
891 from jetgp.full_ddegp import wddegp_utils
892 deriv_locs = [list(range(len(X_test)))] * self.rays.shape[1] if return_deriv else None
893 return wddegp_utils.differences_by_dim_func(
894 X_test, X_test,
895 self.rays,
896 p_order,
897 p_oti,
898 return_deriv=return_deriv
899 )
900 elif self.submodel_type == 'gddegp':
901 from jetgp.full_gddegp import wgddegp_utils
902 rays_test = rays_predict if return_deriv else None
903 n_dirs = len(self.global_rays) if rays_test is None else len(rays_test)
904 deriv_locs = [list(range(len(X_test)))] * n_dirs if return_deriv else None
905 return wgddegp_utils.differences_by_dim_func(
906 X_test, X_test,
907 rays_test, rays_test,
908 deriv_locs, deriv_locs,
909 p_order,
910 p_oti,
911 return_deriv=return_deriv
912 )