Coverage for jetgp/hyperparameter_optimizers/lbfgs.py: 80%

59 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2026-04-02 14:19 -0500

1import numpy as np 

2from scipy.optimize import minimize 

3from scipy.spatial.distance import cdist 

4from scipy.stats import qmc # For quasi-random sequences 

5 

6 

7def lbfgs_smart(func, lb, ub, **kwargs): 

8 """ 

9 Smart L-BFGS-B optimizer with intelligent restart strategies. 

10  

11 Strategies: 

12 - 'lhs': Latin Hypercube Sampling for space-filling initial points 

13 - 'sobol': Sobol quasi-random sequence for low-discrepancy coverage 

14 - 'exclusion': Avoid regions near previous optima 

15 - 'adaptive': Combine exclusion with basin size estimation 

16  

17 Parameters 

18 ---------- 

19 func : callable 

20 Function to minimize. 

21 lb, ub : array-like 

22 Lower and upper bounds. 

23 kwargs : dict 

24 - x0 : initial guess for first restart 

25 - n_restart_optimizer : number of restarts (default=10) 

26 - strategy : 'random', 'lhs', 'sobol', 'exclusion', 'adaptive' (default='exclusion') 

27 - exclusion_radius : fraction of domain to exclude around optima (default=0.1) 

28 - max_rejection : max attempts to find valid starting point (default=100) 

29 - maxiter, ftol, gtol, debug, disp : standard L-BFGS-B options 

30 """ 

31 x0 = kwargs.pop("x0", None) 

32 n_restarts = kwargs.pop("n_restart_optimizer", 10) 

33 strategy = kwargs.pop("strategy", "lhs") 

34 exclusion_radius = kwargs.pop("exclusion_radius", 0.1) 

35 max_rejection = kwargs.pop("max_rejection", 100) 

36 maxiter = kwargs.pop("maxiter", 200) 

37 ftol = kwargs.pop("ftol", 1e-8) 

38 gtol = kwargs.pop("gtol", 1e-8) 

39 debug = kwargs.pop("debug", False) 

40 disp = kwargs.pop("disp", False) 

41 func_and_grad = kwargs.pop("func_and_grad", None) # callable returning (f, g) — preferred 

42 grad_func = kwargs.pop("grad_func", None) # fallback: separate gradient callable 

43 

44 lb = np.array(lb, dtype=float) 

45 ub = np.array(ub, dtype=float) 

46 ndim = len(lb) 

47 

48 # Normalize to unit hypercube for distance calculations 

49 scale = ub - lb 

50 

51 best_x = None 

52 best_val = np.inf 

53 

54 # Track all found optima (normalized coordinates) 

55 found_optima = [] 

56 found_values = [] 

57 

58 # Pre-generate starting points based on strategy 

59 if strategy == 'lhs': 

60 sampler = qmc.LatinHypercube(d=ndim, seed=42) 

61 samples = sampler.random(n=n_restarts) 

62 starting_points = qmc.scale(samples, lb, ub) 

63 elif strategy == 'sobol': 

64 sampler = qmc.Sobol(d=ndim, seed=42) 

65 samples = sampler.random(n=n_restarts) 

66 starting_points = qmc.scale(samples, lb, ub) 

67 else: 

68 starting_points = None # Generate on-the-fly 

69 

70 for i in range(n_restarts): 

71 # Determine starting point 

72 if x0 is not None and i == 0: 

73 x_init = np.array(x0) 

74 elif starting_points is not None: 

75 x_init = starting_points[i] 

76 else: # random 

77 x_init = np.random.uniform(lb, ub) 

78 

79 # Run optimization 

80 opts = {"maxiter": maxiter, "ftol": ftol, "gtol": gtol, "disp": disp} 

81 if callable(func_and_grad): 

82 # Single call returns (f, g) — one Cholesky per step 

83 res = minimize(func_and_grad, x_init, method="L-BFGS-B", jac=True, 

84 bounds=list(zip(lb, ub)), options=opts) 

85 elif callable(grad_func): 

86 # Separate calls — two Cholesky per step (kept for back-compat) 

87 def _fg(x): return func(x), grad_func(x) 

88 res = minimize(_fg, x_init, method="L-BFGS-B", jac=True, 

89 bounds=list(zip(lb, ub)), options=opts) 

90 else: 

91 res = minimize(func, x_init, method="L-BFGS-B", 

92 bounds=list(zip(lb, ub)), options=opts) 

93 

94 # Store optimum (normalized) 

95 x_opt_norm = (res.x - lb) / scale 

96 found_optima.append(x_opt_norm) 

97 found_values.append(res.fun) 

98 

99 # Update best 

100 if res.fun < best_val: 

101 best_val = res.fun 

102 best_x = res.x 

103 

104 if debug: 

105 print(f"[L-BFGS-B {strategy}] Restart {i+1}/{n_restarts} | " 

106 f"f={res.fun:.6e} | best={best_val:.6e} | ") 

107 

108 

109 return best_x, best_val 

110 

111 

112 

113# ============================================================================= 

114# Unified interface 

115# ============================================================================= 

116 

117def lbfgs(func, lb, ub, **kwargs): 

118 """ 

119 L-BFGS-B optimizer with intelligent restart strategies. 

120  

121 Parameters 

122 ---------- 

123 func : callable 

124 Function to minimize. 

125 lb, ub : array-like 

126 Lower and upper bounds. 

127 kwargs : dict 

128 - strategy : 'random', 'lhs', 'sobol', 'exclusion', 'adaptive', 'clustering' 

129 (default='adaptive') 

130 - n_restart_optimizer : number of restarts (default=10) 

131 - x0 : initial guess for first restart 

132 - maxiter, ftol, gtol, debug, disp : L-BFGS-B options 

133  

134 Strategy descriptions: 

135 - 'random': Pure random restarts (original behavior) 

136 - 'lhs': Latin Hypercube Sampling for space-filling coverage 

137 - 'sobol': Sobol sequence for low-discrepancy coverage 

138 - 'exclusion': Avoid regions near previously found optima 

139 - 'adaptive': Two-phase exploration/exploitation with basin estimation 

140 - 'clustering': Online clustering to identify and avoid basins 

141 """ 

142 strategy = kwargs.get("strategy", "lhs") 

143 

144 

145 return lbfgs_smart(func, lb, ub, **kwargs) 

146 

147 

148 

149# ============================================================================= 

150# Example usage and comparison 

151# =============================================================================