Coverage for jetgp/hyperparameter_optimizers/adam.py: 92%

66 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2026-03-31 11:46 -0500

1import numpy as np 

2 

3def adam(func, lb, ub, **kwargs): 

4 """ 

5 ADAM optimizer - pure NumPy implementation for performance. 

6 Works with custom types like sparse matrices. 

7 """ 

8 x0 = kwargs.pop("x0", None) 

9 num_restart_optimizer = kwargs.pop("n_restart_optimizer", 10) 

10 maxiter = kwargs.pop("maxiter", 1000) 

11 learning_rate = kwargs.pop("learning_rate", 0.001) 

12 beta1 = kwargs.pop("beta1", 0.9) 

13 beta2 = kwargs.pop("beta2", 0.999) 

14 epsilon = kwargs.pop("epsilon", 1e-8) 

15 ftol = kwargs.pop("ftol", 1e-8) 

16 gtol = kwargs.pop("gtol", 1e-8) 

17 debug = kwargs.pop("debug", False) 

18 disp = kwargs.pop("disp", False) 

19 

20 lb = np.asarray(lb, dtype=np.float64) 

21 ub = np.asarray(ub, dtype=np.float64) 

22 n_dim = len(lb) 

23 

24 def forward_gradient(f, x, f_x, h=1e-7): 

25 """ 

26 Forward differences - reuses f(x) from current evaluation. 

27 Only n function evaluations instead of 2n for central differences. 

28 """ 

29 grad = np.empty(n_dim) 

30 x_pert = x.copy() 

31 for i in range(n_dim): 

32 x_pert[i] += h 

33 f_plus = f(x_pert) 

34 if isinstance(f_plus, tuple): 

35 f_plus = f_plus[0] 

36 grad[i] = (f_plus - f_x) / h 

37 x_pert[i] = x[i] # Reset in-place 

38 return grad 

39 

40 best_x = None 

41 best_val = np.inf 

42 

43 for restart in range(num_restart_optimizer): 

44 # Initialize starting point 

45 if x0 is not None and restart == 0: 

46 x = np.array(x0, dtype=np.float64) 

47 else: 

48 x = np.random.uniform(lb, ub) 

49 

50 # Adam state variables (pure NumPy) 

51 m = np.zeros(n_dim) # First moment estimate 

52 v = np.zeros(n_dim) # Second moment estimate 

53 

54 prev_val = np.inf 

55 

56 for t in range(1, maxiter + 1): # Start at 1 for bias correction 

57 # Evaluate function 

58 result = func(x) 

59 if isinstance(result, tuple): 

60 f_val, grad = result[0], result[1] 

61 else: 

62 f_val = result 

63 grad = forward_gradient(func, x, f_val) 

64 

65 # Convergence check 

66 grad_norm = np.linalg.norm(grad) 

67 if t > 1: 

68 f_diff = abs(prev_val - f_val) 

69 if f_diff < ftol and grad_norm < gtol: 

70 if disp: 

71 print(f"Converged at iteration {t}") 

72 break 

73 

74 prev_val = f_val 

75 

76 # Adam update (pure NumPy implementation) 

77 m = beta1 * m + (1.0 - beta1) * grad 

78 v = beta2 * v + (1.0 - beta2) * (grad * grad) 

79 

80 # Bias-corrected estimates 

81 m_hat = m / (1.0 - beta1 ** t) 

82 v_hat = v / (1.0 - beta2 ** t) 

83 

84 # Parameter update 

85 x = x - learning_rate * m_hat / (np.sqrt(v_hat) + epsilon) 

86 

87 # Project onto bounds (in-place) 

88 np.clip(x, lb, ub, out=x) 

89 

90 if disp and t % 100 == 0: 

91 print(f"Iteration {t}: f(x) = {f_val:.6e}, ||grad|| = {grad_norm:.6e}") 

92 

93 # Final evaluation 

94 result = func(x) 

95 final_val = result[0] if isinstance(result, tuple) else result 

96 

97 if final_val < best_val: 

98 best_val = final_val 

99 best_x = x.copy() 

100 

101 if debug: 

102 print(f"[ADAM] Restart {restart+1}/{num_restart_optimizer} -> best_val={best_val}") 

103 

104 return best_x, best_val