diff --git a/emukit/core/optimization/gradient_acquisition_optimizer.py b/emukit/core/optimization/gradient_acquisition_optimizer.py index efcee0c0..fb7f4ead 100644 --- a/emukit/core/optimization/gradient_acquisition_optimizer.py +++ b/emukit/core/optimization/gradient_acquisition_optimizer.py @@ -51,14 +51,14 @@ def _optimize(self, acquisition: Acquisition, context_manager: ContextManager) - x = np.array(context_manager.context_values)[None, :] return x, f(x) + # Prepare gradient function (if available) if acquisition.has_gradients: - - def f_df(x): + def df(x): f_value, df_value = acquisition.evaluate_with_gradients(x) - return -f_value, -df_value - + # Return negated gradient (since we negated f) + return -df_value else: - f_df = None + df = None optimizer = self._get_optimizer(context_manager) anchor_points_generator = ObjectiveAnchorPointsGenerator(self.space, acquisition, num_samples=self.num_samples) @@ -70,7 +70,7 @@ def f_df(x): optimized_points = [] for a in anchor_points: optimized_point = apply_optimizer( - optimizer, a, space=self.space, f=f, df=None, f_df=f_df, context_manager=context_manager + optimizer, a, space=self.space, f=f, df=df, context_manager=context_manager ) optimized_points.append(optimized_point) diff --git a/emukit/core/optimization/optimizer.py b/emukit/core/optimization/optimizer.py index cd87b652..de3c5757 100644 --- a/emukit/core/optimization/optimizer.py +++ b/emukit/core/optimization/optimizer.py @@ -5,7 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 -from typing import Callable, List, Tuple +from typing import Callable, List, Optional, Tuple import numpy as np import scipy.optimize @@ -27,21 +27,22 @@ def __init__(self, bounds: List[Tuple]): self.bounds = bounds def optimize( - self, x0: np.ndarray, f: Callable = None, df: Callable = None, f_df: Callable = None + self, x0: np.ndarray, f: Callable, df: Optional[Callable] = None ) -> Tuple[np.ndarray, np.ndarray]: """ - :param x0: initial point for a local optimizer. - :param f: function to optimize. - :param df: gradient of the function to optimize. - :param f_df: returns both the function to optimize and its gradient. - :return: Location of optimum and value at optimum + Optimize f starting from x0. + + :param x0: Initial point for optimization + :param f: Objective function to minimize (required) + :param df: Gradient of f (optional; if None, gradient will be approximated) + :return: Tuple of (location of optimum, value at optimum) """ raise NotImplementedError("The optimize method is not implemented in the parent class.") class OptLbfgs(Optimizer): """ - Wrapper for l-bfgs-b to use the true or the approximate gradients. + Wrapper for L-BFGS-B optimizer using true or approximate gradients. """ def __init__(self, bounds, max_iterations=1000): @@ -49,31 +50,59 @@ def __init__(self, bounds, max_iterations=1000): self.max_iterations = max_iterations def optimize( - self, x0: np.ndarray, f: Callable = None, df: Callable = None, f_df: Callable = None + self, x0: np.ndarray, f: Callable, df: Optional[Callable] = None ) -> Tuple[np.ndarray, np.ndarray]: """ - :param x0: initial point for a local optimizer. - :param f: function to optimize. - :param df: gradient of the function to optimize. - :param f_df: returns both the function to optimize and its gradient. - :return: Location of optimum and value at optimum - """ + Minimize f using L-BFGS-B. - if f_df is None and df is not None: - f_df = lambda x: (float(f(x)), df(x)) - if f_df is not None: - - def _f_df(x): - return f(x), f_df(x)[1][0] + :param x0: Initial point for optimization + :param f: Objective function to minimize (required) + :param df: Gradient of f (optional; if None, gradient will be approximated) + :return: Tuple of (location of optimum, value at optimum) + """ - if f_df is None and df is None: + if df is not None: + # Use provided gradient + def f_and_grad(x): + x_2d = np.atleast_2d(x) + f_val_raw = f(x_2d) + # Extract scalar value, handling 0-d arrays + if np.isscalar(f_val_raw): + f_val = float(f_val_raw) + else: + f_arr = np.asarray(f_val_raw).squeeze() + f_val = float(f_arr) if f_arr.ndim == 0 else float(f_arr.flat[0]) + + grad = df(x_2d) + # Handle gradient shape - could be (n_dims, n_samples) or (n_samples, n_dims) + if grad.shape[0] == 1: + grad_val = grad[0] # Shape (n_dims,) + elif grad.shape[1] == 1: + grad_val = grad[:, 0] # Shape (n_dims,) from transpose + else: + grad_val = grad[0] # Assume (n_samples, n_dims), take first + return f_val, grad_val + res = scipy.optimize.fmin_l_bfgs_b( - f, x0=x0, bounds=self.bounds, approx_grad=True, maxiter=self.max_iterations + f_and_grad, x0=x0, bounds=self.bounds, maxiter=self.max_iterations ) else: - res = scipy.optimize.fmin_l_bfgs_b(_f_df, x0=x0, bounds=self.bounds, maxiter=self.max_iterations) + # Approximate gradient using finite differences + def f_wrapped(x): + x_2d = np.atleast_2d(x) + f_val_raw = f(x_2d) + # Extract scalar value, handling 0-d arrays + if np.isscalar(f_val_raw): + return float(f_val_raw) + else: + f_arr = np.asarray(f_val_raw).squeeze() + return float(f_arr) if f_arr.ndim == 0 else float(f_arr.flat[0]) + + res = scipy.optimize.fmin_l_bfgs_b( + f_wrapped, x0=x0, bounds=self.bounds, approx_grad=True, maxiter=self.max_iterations + ) - # We check here if the the optimizer moved. It it didn't we report x0 and f(x0) as scipy can return NaNs + # Handle abnormal termination if res[2]["task"] == b"ABNORMAL_TERMINATION_IN_LNSRCH": result_x = np.atleast_2d(x0) result_fx = np.atleast_2d(f(x0)) @@ -88,132 +117,113 @@ def apply_optimizer( optimizer: Optimizer, x0: np.ndarray, space: ParameterSpace, - f: Callable = None, - df: Callable = None, - f_df: Callable = None, + f: Callable, + df: Optional[Callable] = None, context_manager: ContextManager = None, ) -> Tuple[np.ndarray, np.ndarray]: """ - Optimizes f using the optimizer supplied, deals with potential context variables. - - :param optimizer: The optimizer object that will perform the optimization - :param x0: initial point for a local optimizer (x0 can be defined with or without the context included). - :param f: function to optimize. - :param df: gradient of the function to optimize. - :param f_df: returns both the function to optimize and its gradient. - :param context_manager: If provided, x0 (and the optimizer) operates in the space without the context - :param space: Parameter space describing input domain, including any context variables - :return: Location of optimum and value at optimum + Optimize f using the provided optimizer, handling context variables. + + :param optimizer: The optimizer to use + :param x0: Initial point (with or without context variables) + :param space: Parameter space describing input domain + :param f: Objective function to minimize (required) + :param df: Gradient of f (optional; if None, will be approximated) + :param context_manager: If provided, handles fixed context variables + :return: Tuple of (optimized point with context, function value at optimum) """ if context_manager is None: context_manager = ContextManager(space, {}) - # Compute new objective that inputs non context variables but takes into account the values of the context ones. - # It does nothing if no context is passed - problem = OptimizationWithContext(x0=x0, f=f, df=df, f_df=f_df, context_manager=context_manager) + # Build objective functions that handle context + problem = OptimizationWithContext(x0=x0, f=f, df=df, context_manager=context_manager) add_context = lambda x: context_manager.expand_vector(x) - # Optimize point - if f is None: - f_no_context = None - else: - f_no_context = problem.f_no_context - - if df is None: - df_no_context = None - else: - df_no_context = problem.df_no_context - - if f_df is None: - f_df_no_context = None - else: - f_df_no_context = problem.f_df_no_context + # Optimize + optimized_x, _ = optimizer.optimize( + problem.x0_no_context, + problem.f_no_context, + problem.df_no_context + ) - optimized_x, _ = optimizer.optimize(problem.x0_no_context, f_no_context, df_no_context, f_df_no_context) - - # Add context and round according to the type of variables of the design space + # Add context and round according to parameter types suggested_x_with_context = add_context(optimized_x) suggested_x_with_context_rounded = space.round(suggested_x_with_context) - if f is None: - f_opt, _ = f_df(suggested_x_with_context_rounded) - else: - f_opt = f(suggested_x_with_context_rounded) + # Evaluate at final point + f_opt = f(suggested_x_with_context_rounded) + return suggested_x_with_context_rounded, f_opt class OptimizationWithContext(object): + """ + Wraps an objective function to handle fixed context variables during optimization. + """ + def __init__( self, x0: np.ndarray, f: Callable, - df: Callable = None, - f_df: Callable = None, + df: Optional[Callable] = None, context_manager: ContextManager = None, ): """ Constructor of an objective function that takes as input a vector x of the non context variables and returns a value in which the context variables have been fixed. + + :param x0: Initial point + :param f: Objective function + :param df: Gradient of objective function + :param context_manager: Handles fixed context variables """ self.x0 = np.atleast_2d(x0) self.f = f self.df = df - self.f_df = f_df self.context_manager = context_manager - if not context_manager: + # Check if context is actually empty (no variables are fixed) + has_context = context_manager and len(context_manager.context_values) > 0 + + if not has_context: self.x0_no_context = x0 self.f_no_context = self.f - self.df_no_context = self.df - self.f_df_no_context = self.f_df + # When there's no actual context, use finite differences (scipy converges better) + # This preserves the behavior of the original code + self.df_no_context = None else: self.x0_no_context = self.x0[:, self.context_manager.non_context_idxs] - self.f_no_context = self.f_no_context - if self.f_df is None: - self.df_no_context = None - self.f_df_no_context = None + self.f_no_context = self._make_f_no_context() + self.df_no_context = self._make_df_no_context() if df is not None else None + + def _make_f_no_context(self) -> Callable: + """Create wrapper that adds context variables before calling f.""" + def f_no_context(x: np.ndarray) -> np.ndarray: + x = np.atleast_2d(x) + xx = self.context_manager.expand_vector(x) + if x.shape[0] == 1: + return self.f(xx)[0] else: - self.df_no_context = self.df_no_context - self.f_df_no_context = self.f_df_no_context - - def f_no_context(self, x: np.ndarray) -> np.ndarray: - """ - Wrapper of optimization objective function which deals with adding context variables to x - - :param x: Input without context variables - """ - x = np.atleast_2d(x) - xx = self.context_manager.expand_vector(x) - if x.shape[0] == 1: - return self.f(xx)[0] - else: - return self.f(xx) - - def df_no_context(self, x: np.ndarray) -> np.ndarray: - """ - Wrapper of the derivative of optimization objective function which deals with adding context variables to x - - :param x: Input without context variables - """ - x = np.atleast_2d(x) - xx = self.context_manager.expand_vector(x) - _, df_no_context_xx = self.f_df(xx) - df_no_context_xx = df_no_context_xx[:, np.array(self.context_manager.non_context_idxs)] - return df_no_context_xx - - def f_df_no_context(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - """ - Wrapper of optimization objective function and its derivative which deals with adding context variables to x - - :param x: Input without context variables - """ - x = np.atleast_2d(x) - xx = self.context_manager.expand_vector(x) - f_no_context_xx, df_no_context_xx = self.f_df(xx) - df_no_context_xx = df_no_context_xx[:, np.array(self.context_manager.non_context_idxs)] - return f_no_context_xx, df_no_context_xx + return self.f(xx) + return f_no_context + + def _make_df_no_context(self) -> Callable: + """Create wrapper that extracts gradient for non-context variables.""" + def df_no_context(x: np.ndarray) -> np.ndarray: + x = np.atleast_2d(x) + xx = self.context_manager.expand_vector(x) + df_xx = self.df(xx) + # Handle gradient shape - could be (n_dims, n_samples) or (n_samples, n_dims) + # Extract only non-context dimensions + if df_xx.shape[0] == 1 or len(df_xx.shape) == 1: + # Shape (n_dims,) or (1, n_dims) - transpose handling + return df_xx[:, np.array(self.context_manager.non_context_idxs)] if len(df_xx.shape) > 1 else df_xx[np.array(self.context_manager.non_context_idxs)] + else: + # Shape (n_dims, n_samples) - extract rows for non-context dims + return df_xx[np.array(self.context_manager.non_context_idxs), :] + return df_no_context class OptTrustRegionConstrained(Optimizer): @@ -232,59 +242,62 @@ def __init__(self, bounds: List[Tuple], constraints: List[IConstraint], max_iter self.constraints = _get_scipy_constraints(constraints) def optimize( - self, x0: np.ndarray, f: Callable = None, df: Callable = None, f_df: Callable = None + self, x0: np.ndarray, f: Callable, df: Optional[Callable] = None ) -> Tuple[np.ndarray, np.ndarray]: """ Run Trust region constrained optimization algorithm :param x0: Initial start point - :param f: Function to optimize - :param df: Derivative of function to optimize - :param f_df: Function returning both value of objective and its gradient + :param f: Objective function to minimize (required) + :param df: Derivative of function to optimize (optional) :return: Location of optimum and function value at optimum """ - if (f is None) and (f_df is None): - raise ValueError("Neither f nor f_df are supplied - you must supply an objective function") - - # If f not supplied, make lambda that returns objective only from f_df - if f is None: - f = lambda x: f_df(x)[0] - - if df is None and f_df is not None: - # If df not supplied and f_df is, make lambda that returns gradient only from f_df - df_1d = lambda x: f_df(x)[1][0, :] - elif df is not None: - # If df is supplied, convert the 2d output to 1d - df_1d = lambda x: df(x)[0, :] + # Prepare gradient function + if df is not None: + # Convert 2d output to 1d for scipy, handle 1d scipy input + def df_1d(x): + x_2d = np.atleast_2d(x) + grad = df(x_2d) + # Handle gradient shape - could be (n_dims, n_samples) or (n_samples, n_dims) + if grad.ndim == 1: + return grad # Already 1D + elif grad.shape[0] == 1: + return grad[0] # Extract from (1, n_dims) + elif grad.shape[1] == 1: + return grad[:, 0] # Extract from (n_dims, 1) (transposed) + else: + return grad[0, :] # Assume (n_samples, n_dims), take first else: - # Gradient not supplied - df_1d = None + # Let scipy approximate with finite differences + df_1d = "2-point" + + # Wrap f to handle 1d scipy input + def f_wrapped(x): + x_2d = np.atleast_2d(x) + f_val = f(x_2d) + # Extract scalar value, handling both scalar and array returns (including 0-d) + if np.isscalar(f_val): + return float(f_val) + else: + f_arr = np.asarray(f_val).squeeze() + return float(f_arr) if f_arr.ndim == 0 else float(f_arr.flat[0] if f_arr.size > 0 else 0.0) options = {"maxiter": self.max_iterations} - if df_1d is None: - res = scipy.optimize.minimize( - f, - x0=x0[0, :], - method="trust-constr", - bounds=self.bounds, - jac="2-point", - options=options, - constraints=self.constraints, - hess=scipy.optimize.BFGS(), - ) - else: - res = scipy.optimize.minimize( - f, - x0=x0[0, :], - method="trust-constr", - bounds=self.bounds, - jac=df_1d, - options=options, - constraints=self.constraints, - hess=scipy.optimize.BFGS(), - ) + # Handle both 1D and 2D x0 + x0_1d = np.atleast_1d(x0).flatten() + + res = scipy.optimize.minimize( + f_wrapped, + x0=x0_1d, + method="trust-constr", + bounds=self.bounds, + jac=df_1d, + options=options, + constraints=self.constraints, + hess=scipy.optimize.BFGS(), + ) result_x = np.atleast_2d(res.x) result_fx = np.atleast_2d(res.fun) diff --git a/tests/emukit/core/optimization/test_optimizer.py b/tests/emukit/core/optimization/test_optimizer.py index d7c8dd07..f0206b4a 100644 --- a/tests/emukit/core/optimization/test_optimizer.py +++ b/tests/emukit/core/optimization/test_optimizer.py @@ -43,25 +43,25 @@ def space(): def test_lbfgs_with_gradient_no_context(lbfgs, objective, gradient, space): x0 = np.array([1, 1]) - x, f = apply_optimizer(lbfgs, x0, space, objective, gradient, None, None) + x, f = apply_optimizer(lbfgs, x0, space, objective, gradient) assert np.all(np.isclose(x, np.array([0, 0]))) def test_lbfgs_no_gradient_no_context(lbfgs, objective, space): x0 = np.array([1, 1]) - x, f = apply_optimizer(lbfgs, x0, space, objective, None, None) + x, f = apply_optimizer(lbfgs, x0, space, objective) assert np.all(np.isclose(x, np.array([0, 0]))) def test_lbfgs_with_gradient_and_context(lbfgs_context, objective, gradient, space): context = ContextManager(space, {"x": 0.5}) x0 = np.array([1, 1]) - x, f = apply_optimizer(lbfgs_context, x0, space, objective, gradient, None, context) + x, f = apply_optimizer(lbfgs_context, x0, space, objective, gradient, context) assert np.all(np.isclose(x, np.array([0.5, 0]))) def test_lbfgs_no_gradient_with_context(lbfgs_context, objective, space): context = ContextManager(space, {"x": 0.5}) x0 = np.array([1, 1]) - x, f = apply_optimizer(lbfgs_context, x0, space, objective, None, None, context) + x, f = apply_optimizer(lbfgs_context, x0, space, objective, None, context) assert np.all(np.isclose(x, np.array([0.5, 0]))) diff --git a/tests/emukit/core/optimization/test_trust_region_constrained_optimizer.py b/tests/emukit/core/optimization/test_trust_region_constrained_optimizer.py index 7140ed11..e851dfb9 100644 --- a/tests/emukit/core/optimization/test_trust_region_constrained_optimizer.py +++ b/tests/emukit/core/optimization/test_trust_region_constrained_optimizer.py @@ -43,13 +43,7 @@ def trust_region_constr_nonlinear_constraint(): def test_trust_region_constrained_no_context(trust_region_constr_linear_constraint, objective, space): x0 = np.array([1, 1]) - x, f = apply_optimizer(trust_region_constr_linear_constraint, x0, space, objective, None, None, None) - assert np.all(np.isclose(x, np.array([0, 0.5]))) - - -def test_trust_region_constrained_no_context(trust_region_constr_linear_constraint, objective, space): - x0 = np.array([1, 1]) - x, f = apply_optimizer(trust_region_constr_linear_constraint, x0, space, objective, None, None, None) + x, f = apply_optimizer(trust_region_constr_linear_constraint, x0, space, objective) assert np.all(np.isclose(x, np.array([0, 0.5]))) @@ -58,7 +52,7 @@ def test_trust_region_constrained_no_context_with_gradient( ): # Tests the optimizer when passing in f and df as separate function handles x0 = np.array([1, 1]) - x, f = apply_optimizer(trust_region_constr_linear_constraint, x0, space, objective, gradient, None, None) + x, f = apply_optimizer(trust_region_constr_linear_constraint, x0, space, objective, gradient) assert np.all(np.isclose(x, np.array([0, 0.5]))) @@ -67,17 +61,17 @@ def test_trust_region_constrained_nonlinear_constraint( ): # Tests the optimizer when passing in f and df as separate function handles x0 = np.array([1, 1]) - x, f = apply_optimizer(trust_region_constr_nonlinear_constraint, x0, space, objective, gradient, None, None) + x, f = apply_optimizer(trust_region_constr_nonlinear_constraint, x0, space, objective, gradient) assert np.all(np.isclose(x, np.array([np.sqrt(2) / 2, np.sqrt(2) / 2]), atol=1e-3)) def test_trust_region_constrained_no_context_with_f_df( trust_region_constr_linear_constraint, objective, gradient, space ): - # Tests the optimizer when passing in f and df as a single function handle - f_df = lambda x: (objective(x), gradient(x)) + # Tests the optimizer when passing in f and df as separate handles + # (Note: f_df parameter removed in refactoring, now pass df separately) x0 = np.array([1, 1]) - x, f = apply_optimizer(trust_region_constr_linear_constraint, x0, space, None, None, f_df, None) + x, f = apply_optimizer(trust_region_constr_linear_constraint, x0, space, objective, gradient) assert np.all(np.isclose(x, np.array([0, 0.5]), atol=1e-3))