Source code for gamspy.formulations.nn.linear

from __future__ import annotations

import math
from typing import TYPE_CHECKING

import gamspy as gp
import gamspy.formulations.utils as utils
from gamspy.exceptions import ValidationError
from gamspy.formulations.result import FormulationResult
from gamspy.math import dim

if TYPE_CHECKING:
    import numpy as np

    from gamspy import Parameter, Variable


[docs] class Linear: """ Formulation generator for Linear layer in GAMS. Parameters ---------- container : Container Container that will contain the new variable and equations. in_features : int Input feature size out_features : int Output feature size bias : bool = True Should bias be added after linear transformation, by Default: True name_prefix : str | None Prefix for generated GAMSPy symbols, by default None which means random prefix. Using the same name_prefix in different formulations causes name conflicts. Do not use the same name_prefix again. Examples -------- >>> import gamspy as gp >>> import numpy as np >>> from gamspy.math import dim >>> m = gp.Container() >>> l1 = gp.formulations.Linear(m, 128, 64) >>> w = np.random.rand(64, 128) >>> b = np.random.rand(64) >>> l1.load_weights(w, b) >>> x = gp.Variable(m, "x", domain=dim([10, 128])) >>> y, set_y = l1(x) >>> [d.name for d in y.domain] ['DenseDim10_1', 'DenseDim64_1'] """ def __init__( self, container: gp.Container, in_features: int, out_features: int, bias: bool = True, name_prefix: str | None = None, ): if not isinstance(in_features, int) or in_features <= 0: raise ValidationError("in_features must be a positive integer") if not isinstance(out_features, int) or out_features <= 0: raise ValidationError("out_features must be a positive integer") if not isinstance(bias, bool): raise ValidationError("bias must be a boolean") self.container = container self.in_features = in_features self.out_features = out_features self.use_bias = bias self._state = 0 self.weight: Parameter | Variable | None = None self.weight_array = None self.bias: Parameter | Variable | None = None self.bias_array = None if name_prefix is None: name_prefix = gp.utils._get_unique_name() self._name_prefix = name_prefix
[docs] def load_weights(self, weight: np.ndarray, bias: np.ndarray | None = None) -> None: """ Mark Linear as parameter and load weights from NumPy arrays. After this is called `make_variable` cannot be called. Use this when you already have the weights of your Linear layer. Parameters ---------- weight : np.ndarray Linear layer weights in shape (out_features x in_features) bias : np.ndarray | None Linear layer bias in shape (out_features, ), only required when bias=True during initialization """ if self._state == 2: raise ValidationError( "load_weights cannot be used after calling make_variable" ) if self.use_bias is False and bias is not None: raise ValidationError( "bias must be None since bias was set to False during initialization" ) if self.use_bias is True and bias is None: raise ValidationError("bias must be provided") if len(weight.shape) != 2: raise ValidationError( f"expected 2D input for weight (got {len(weight.shape)}D input)" ) expected_shape = ( self.out_features, self.in_features, ) if weight.shape != expected_shape: raise ValidationError(f"weight expected to be in shape {expected_shape}") if bias is not None: if len(bias.shape) != 1: raise ValidationError( f"expected 1D input for bias (got {len(bias.shape)}D input)" ) if bias.shape[0] != self.out_features: raise ValidationError( f"bias expected to be in shape ({self.out_features},)" ) if self.weight is None: self.weight = gp.Parameter( self.container, name=utils._generate_name("p", self._name_prefix, "weight"), domain=dim(expected_shape), records=weight, ) else: self.weight.setRecords(weight) self.weight_array = weight if self.use_bias: if self.bias is None: self.bias = gp.Parameter( self.container, name=utils._generate_name("p", self._name_prefix, "bias"), domain=dim([self.out_features]), records=bias, ) else: self.bias.setRecords(bias) self.bias_array = bias self._state = 1
[docs] def make_variable(self, *, init_weights=False) -> None: """ Mark Linear layer as variable. After this is called `load_weights` cannot be called. Use this when you need to learn the weights of your linear layer in your optimization model. Parameters ---------- init_weights : Optional[bool] False by default. Whether to initialize weights. It is suggested you set this to True unless you want to initialize weights yourself. When `init_weights` is set to True, values are initialized from :math:`\\mathcal{U}(-\\sqrt{k},\\sqrt{k})`, where :math:`k = 1/in\\_features`. """ if self._state == 1: raise ValidationError( "make_variable cannot be used after calling load_weights" ) expected_shape = ( self.out_features, self.in_features, ) sk = math.sqrt(1 / self.in_features) if self.weight is None: self.weight = gp.Variable( self.container, name=utils._generate_name("v", self._name_prefix, "weight"), domain=dim(expected_shape), ) if init_weights: self.weight.l[...] = gp.math.uniform(-sk, sk) if self.use_bias and self.bias is None: self.bias = gp.Variable( self.container, name=utils._generate_name("v", self._name_prefix, "bias"), domain=dim([self.out_features]), ) if init_weights: self.bias.l[...] = gp.math.uniform(-sk, sk) self._state = 2
[docs] def __call__( self, input: gp.Parameter | gp.Variable, propagate_bounds: bool = True ) -> FormulationResult: """ Forward pass your input, generate output and equations required for calculating the linear transformation. If `propagate_bounds` is True, the `input` is of type variable, and `load_weights` was called, then the bounds of the input are propagated to the output. Returns `FormulationResult` which can be unpacked as a output variable and list of equations. FormulationResult: - equations_created: ["set_output"] - variables_created: ["output", "weight", "bias"] - parameters_created: ["weight", "bias", "input_bounds", "output_bounds"] Note: - For backward compatibility, this result object can be unpacked as a tuple: `output, equations = linear(input)`. - `weight` and `bias` are available as variables if `make_variable` was called. - `weight` and `bias` are available as parameters if `load_weights` was called. - `input_bounds` and `output_bounds`are available as parameters if `propogate_bounds=True`. Parameters ---------- input : gp.Parameter | gp.Variable input to the linear layer, must be in shape (* x in_features) propagate_bounds : bool = True If True, propagate bounds of the input to the output. Otherwise, the output variable is unbounded. Returns ------- FormulationResult """ import numpy as np if not isinstance(propagate_bounds, bool): raise ValidationError("propagate_bounds should be a boolean.") if self.weight is None: raise ValidationError( "You must call load_weights or make_variable first before using the Linear" ) if len(input.domain) == 0: raise ValidationError("expected an input with at least 1 dimension") if len(input.domain[-1]) != self.in_features: raise ValidationError("in_features does not match") expr = input @ self.weight.t() if self.bias is not None: expr = expr + self.bias[expr.domain[-1]] out = gp.Variable( self.container, name=utils._generate_name("v", self._name_prefix, "output"), domain=expr.domain, ) set_out = gp.Equation( self.container, name=utils._generate_name("e", self._name_prefix, "set_output"), domain=out.domain, ) set_out[...] = out == expr # If propagate_bounds is True, weight is a parameter and input is a variable, # we will propagate the bounds of the input to the output result = FormulationResult( result=out, equations_created={"set_output": set_out}, ) result.variables_created["output"] = out if isinstance(self.weight, gp.Variable): result.variables_created["weight"] = self.weight else: result.parameters_created["weight"] = self.weight if self.bias is not None: if isinstance(self.bias, gp.Variable): result.variables_created["bias"] = self.bias else: result.parameters_created["bias"] = self.bias if propagate_bounds and self._state == 1 and isinstance(input, gp.Variable): x_bounds = gp.Parameter( self.container, name=utils._generate_name("p", self._name_prefix, "input_bounds"), domain=dim([2, *input.shape]), ) x_bounds[("0",) + tuple(input.domain)] = input.lo[...] x_bounds[("1",) + tuple(input.domain)] = input.up[...] result.parameters_created["input_bounds"] = x_bounds # If the bounds are all zeros (None in GAMSPy parameters); # we skip matrix multiplication as it will result in zero values if x_bounds.records is None: out_bounds_array = np.zeros(out.shape) if self.use_bias: out_bounds_array = out_bounds_array + self.bias_array out_bounds = gp.Parameter( self.container, name=utils._generate_name("p", self._name_prefix, "output_bounds"), domain=dim(out.shape), records=out_bounds_array, ) out.lo[...] = out_bounds out.up[...] = out_bounds result.parameters_created["output_bounds"] = out_bounds return result x_lb, x_ub = x_bounds.toDense() # To deal with infinity values in the input bounds, we convert them into complex numbers # where if the value is -inf, we convert it to 0 - 1j # and if the value is inf, we convert it to 0 + 1j x_lb = np.where(x_lb == -np.inf, 0 - 1j, x_lb) x_ub = np.where(x_ub == np.inf, 0 + 1j, x_ub) # get the positive and negative weights separately w_pos = np.maximum(self.weight_array, 0) w_neg = np.minimum(self.weight_array, 0) lo_out = (x_lb @ w_pos.T) + (x_ub @ w_neg.T) up_out = (x_ub @ w_pos.T) + (x_lb @ w_neg.T) def _decode_complex_number(z: np.complex128) -> float: """ Decode complex number to real number. 5 + 0j -> 5 3 + 1j -> inf 7 - 3j -> -inf """ # If imaginary part is zero, return real part if z.imag == 0: return z.real # If imaginary part is positive, return positive infinity elif z.imag > 0: return np.inf # If imaginary part is negative, return negative infinity else: return -np.inf lo_out = np.vectorize(_decode_complex_number)(lo_out) up_out = np.vectorize(_decode_complex_number)(up_out) if self.use_bias: lo_out = lo_out + self.bias_array up_out = up_out + self.bias_array out_bounds_array = np.stack([lo_out, up_out], axis=0) out_bounds = gp.Parameter( self.container, name=utils._generate_name("p", self._name_prefix, "output_bounds"), domain=dim([2, *out.shape]), records=out_bounds_array, ) out.lo[...] = out_bounds[("0",) + tuple(out.domain)] out.up[...] = out_bounds[("1",) + tuple(out.domain)] result.parameters_created["output_bounds"] = out_bounds return result