Coverage for jetgp/hyperparameter_optimizers/adam.py: 92%

1import numpy as np

3def adam(func, lb, ub, **kwargs):

4 """

5 ADAM optimizer - pure NumPy implementation for performance.

6 Works with custom types like sparse matrices.

7 """

8 x0 = kwargs.pop("x0", None)

9 num_restart_optimizer = kwargs.pop("n_restart_optimizer", 10)

10 maxiter = kwargs.pop("maxiter", 1000)

11 learning_rate = kwargs.pop("learning_rate", 0.001)

12 beta1 = kwargs.pop("beta1", 0.9)

13 beta2 = kwargs.pop("beta2", 0.999)

14 epsilon = kwargs.pop("epsilon", 1e-8)

15 ftol = kwargs.pop("ftol", 1e-8)

16 gtol = kwargs.pop("gtol", 1e-8)

17 debug = kwargs.pop("debug", False)

18 disp = kwargs.pop("disp", False)

20 lb = np.asarray(lb, dtype=np.float64)

21 ub = np.asarray(ub, dtype=np.float64)

22 n_dim = len(lb)

24 def forward_gradient(f, x, f_x, h=1e-7):

25 """

26 Forward differences - reuses f(x) from current evaluation.

27 Only n function evaluations instead of 2n for central differences.

28 """

29 grad = np.empty(n_dim)

30 x_pert = x.copy()

31 for i in range(n_dim):

32 x_pert[i] += h

33 f_plus = f(x_pert)

34 if isinstance(f_plus, tuple):

35 f_plus = f_plus[0]

36 grad[i] = (f_plus - f_x) / h

37 x_pert[i] = x[i] # Reset in-place

38 return grad

40 best_x = None

41 best_val = np.inf

43 for restart in range(num_restart_optimizer):

44 # Initialize starting point

45 if x0 is not None and restart == 0:

46 x = np.array(x0, dtype=np.float64)

47 else:

48 x = np.random.uniform(lb, ub)

50 # Adam state variables (pure NumPy)

51 m = np.zeros(n_dim) # First moment estimate

52 v = np.zeros(n_dim) # Second moment estimate

54 prev_val = np.inf

56 for t in range(1, maxiter + 1): # Start at 1 for bias correction

57 # Evaluate function

58 result = func(x)

59 if isinstance(result, tuple):

60 f_val, grad = result[0], result[1]

61 else:

62 f_val = result

63 grad = forward_gradient(func, x, f_val)

65 # Convergence check

66 grad_norm = np.linalg.norm(grad)

67 if t > 1:

68 f_diff = abs(prev_val - f_val)

69 if f_diff < ftol and grad_norm < gtol:

70 if disp:

71 print(f"Converged at iteration {t}")

72 break

74 prev_val = f_val

76 # Adam update (pure NumPy implementation)

77 m = beta1 * m + (1.0 - beta1) * grad

78 v = beta2 * v + (1.0 - beta2) * (grad * grad)

80 # Bias-corrected estimates

81 m_hat = m / (1.0 - beta1 ** t)

82 v_hat = v / (1.0 - beta2 ** t)

84 # Parameter update

85 x = x - learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)

87 # Project onto bounds (in-place)

88 np.clip(x, lb, ub, out=x)

90 if disp and t % 100 == 0:

91 print(f"Iteration {t}: f(x) = {f_val:.6e}, ||grad|| = {grad_norm:.6e}")

93 # Final evaluation

94 result = func(x)

95 final_val = result[0] if isinstance(result, tuple) else result

97 if final_val < best_val:

98 best_val = final_val

99 best_x = x.copy()

100

101 if debug:

102 print(f"[ADAM] Restart {restart+1}/{num_restart_optimizer} -> best_val={best_val}")

103

104 return best_x, best_val