Coverage for jetgp/hyperparameter_optimizers/adam.py: 92%
66 statements
« prev ^ index » next coverage.py v7.10.7, created at 2026-03-31 11:46 -0500
« prev ^ index » next coverage.py v7.10.7, created at 2026-03-31 11:46 -0500
1import numpy as np
3def adam(func, lb, ub, **kwargs):
4 """
5 ADAM optimizer - pure NumPy implementation for performance.
6 Works with custom types like sparse matrices.
7 """
8 x0 = kwargs.pop("x0", None)
9 num_restart_optimizer = kwargs.pop("n_restart_optimizer", 10)
10 maxiter = kwargs.pop("maxiter", 1000)
11 learning_rate = kwargs.pop("learning_rate", 0.001)
12 beta1 = kwargs.pop("beta1", 0.9)
13 beta2 = kwargs.pop("beta2", 0.999)
14 epsilon = kwargs.pop("epsilon", 1e-8)
15 ftol = kwargs.pop("ftol", 1e-8)
16 gtol = kwargs.pop("gtol", 1e-8)
17 debug = kwargs.pop("debug", False)
18 disp = kwargs.pop("disp", False)
20 lb = np.asarray(lb, dtype=np.float64)
21 ub = np.asarray(ub, dtype=np.float64)
22 n_dim = len(lb)
24 def forward_gradient(f, x, f_x, h=1e-7):
25 """
26 Forward differences - reuses f(x) from current evaluation.
27 Only n function evaluations instead of 2n for central differences.
28 """
29 grad = np.empty(n_dim)
30 x_pert = x.copy()
31 for i in range(n_dim):
32 x_pert[i] += h
33 f_plus = f(x_pert)
34 if isinstance(f_plus, tuple):
35 f_plus = f_plus[0]
36 grad[i] = (f_plus - f_x) / h
37 x_pert[i] = x[i] # Reset in-place
38 return grad
40 best_x = None
41 best_val = np.inf
43 for restart in range(num_restart_optimizer):
44 # Initialize starting point
45 if x0 is not None and restart == 0:
46 x = np.array(x0, dtype=np.float64)
47 else:
48 x = np.random.uniform(lb, ub)
50 # Adam state variables (pure NumPy)
51 m = np.zeros(n_dim) # First moment estimate
52 v = np.zeros(n_dim) # Second moment estimate
54 prev_val = np.inf
56 for t in range(1, maxiter + 1): # Start at 1 for bias correction
57 # Evaluate function
58 result = func(x)
59 if isinstance(result, tuple):
60 f_val, grad = result[0], result[1]
61 else:
62 f_val = result
63 grad = forward_gradient(func, x, f_val)
65 # Convergence check
66 grad_norm = np.linalg.norm(grad)
67 if t > 1:
68 f_diff = abs(prev_val - f_val)
69 if f_diff < ftol and grad_norm < gtol:
70 if disp:
71 print(f"Converged at iteration {t}")
72 break
74 prev_val = f_val
76 # Adam update (pure NumPy implementation)
77 m = beta1 * m + (1.0 - beta1) * grad
78 v = beta2 * v + (1.0 - beta2) * (grad * grad)
80 # Bias-corrected estimates
81 m_hat = m / (1.0 - beta1 ** t)
82 v_hat = v / (1.0 - beta2 ** t)
84 # Parameter update
85 x = x - learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)
87 # Project onto bounds (in-place)
88 np.clip(x, lb, ub, out=x)
90 if disp and t % 100 == 0:
91 print(f"Iteration {t}: f(x) = {f_val:.6e}, ||grad|| = {grad_norm:.6e}")
93 # Final evaluation
94 result = func(x)
95 final_val = result[0] if isinstance(result, tuple) else result
97 if final_val < best_val:
98 best_val = final_val
99 best_x = x.copy()
101 if debug:
102 print(f"[ADAM] Restart {restart+1}/{num_restart_optimizer} -> best_val={best_val}")
104 return best_x, best_val