Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda-envs/environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies:
- jupyter-sphinx
- myst-nb<=1.0.0
- numpydoc
- preliz>=0.20.0
- pre-commit>=2.8.0
- polyagamma
- pytest-cov>=2.5
Expand Down
1 change: 1 addition & 0 deletions conda-envs/environment-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies:
- myst-nb<=1.0.0
- numpydoc
- polyagamma
- preliz>=0.20.0
- pre-commit>=2.8.0
- pymc-sphinx-theme>=0.16
- sphinx-copybutton
Expand Down
1 change: 1 addition & 0 deletions conda-envs/environment-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies:
- zarr>=2.5.0,<3
# Extra dependencies for testing
- ipython>=7.16
- preliz>=0.20.0
- pre-commit>=2.8.0
- pytest-cov>=2.5
- pytest>=3.0
Expand Down
1 change: 1 addition & 0 deletions conda-envs/windows-environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies:
- myst-nb<=1.0.0
- numpydoc
- polyagamma
- preliz>=0.20.0
- pre-commit>=2.8.0
- pytest-cov>=2.5
- pytest>=3.0
Expand Down
1 change: 1 addition & 0 deletions conda-envs/windows-environment-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies:
- zarr>=2.5.0,<3
# Extra dependencies for testing
- ipython>=7.16
- preliz>=0.20.0
- pre-commit>=2.8.0
- pytest-cov>=2.5
- pytest>=3.0
Expand Down
155 changes: 48 additions & 107 deletions pymc/func_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings

from collections.abc import Callable

import numpy as np
import pytensor.tensor as pt

from pytensor.gradient import NullTypeGradError
from scipy import optimize
import warnings

import pymc as pm

Expand All @@ -30,24 +23,25 @@ def find_constrained_prior(
distribution: pm.Distribution,
lower: float,
upper: float,
init_guess: dict[str, float],
init_guess: dict[str, float] | None = None,
mass: float = 0.95,
fixed_params: dict[str, float] | None = None,
fixed_stat: tuple[str, float] | None = None,
mass_below_lower: float | None = None,
**kwargs,
) -> dict[str, float]:
"""
Find optimal parameters to get `mass` % of probability of a distribution between `lower` and `upper`.
Find the maximum entropy distribution that satisfies the constraints.

Find the maximum entropy distribution with `mass` in the interval
defined by the `lower` and `upper` end-points.

Note: only works for one- and two-parameter distributions, as there
are exactly two constraints. Fix some combination of parameters
if you want to use it on >=3-parameter distributions.
Additional constraints can be set via `fixed_params` and `fixed_stat`.

Parameters
----------
distribution : Distribution
PyMC distribution you want to set a prior on.
Needs to have a ``logcdf`` method implemented in PyMC.
lower : float
Lower bound to get `mass` % of probability of `pm_dist`.
upper : float
Expand All @@ -65,11 +59,10 @@ def find_constrained_prior(
Dictionary of fixed parameters, so that there are only 2 to optimize.
For instance, for a StudentT, you fix nu to a constant and get the optimized
mu and sigma.
mass_below_lower : float, optional, default None
The probability mass below the ``lower`` bound. If ``None``,
defaults to ``(1 - mass) / 2``, which implies that the probability
mass below the ``lower`` value will be equal to the probability
mass above the ``upper`` value.
fixed_stat: tuple
Summary statistic to fix. The first element should be a name and the second a
numerical value. Valid names are: "mean", "mode", "median", "var", "std",
"skewness", "kurtosis". Defaults to None.

Returns
-------
Expand All @@ -78,19 +71,12 @@ def find_constrained_prior(
Dictionary keys are the parameter names and
dictionary values are the optimized parameter values.

Notes
-----
Optional keyword arguments can be passed to ``find_constrained_prior``. These will be
delivered to the underlying call to :external:py:func:`scipy.optimize.minimize`.

Examples
--------
.. code-block:: python

# get parameters obeying constraints
opt_params = pm.find_constrained_prior(
pm.Gamma, lower=0.1, upper=0.4, mass=0.75, init_guess={"alpha": 1, "beta": 10}
)
opt_params = pm.find_constrained_prior(pm.Gamma, lower=0.1, upper=0.4, mass=0.75)

# use these parameters to draw random samples
samples = pm.Gamma.dist(**opt_params, size=100).eval()
Expand All @@ -104,101 +90,56 @@ def find_constrained_prior(
pm.StudentT,
lower=0,
upper=1,
init_guess={"mu": 5, "sigma": 2},
fixed_params={"nu": 7},
)

Under some circumstances, you might not want to have the same cumulative
probability below the ``lower`` threshold and above the ``upper`` threshold.
For example, you might want to constrain an Exponential distribution to
find the parameter that yields 90% of the mass below the ``upper`` bound,
and have zero mass below ``lower``. You can do that with the following call
to ``find_constrained_prior``

.. code-block:: python

opt_params = pm.find_constrained_prior(
pm.Exponential,
lower=0,
upper=3.0,
mass=0.9,
init_guess={"lam": 1},
mass_below_lower=0,
)
"""
warnings.warn(
"find_constrained_prior is deprecated and will be removed in a future version. "
"Please use maxent function from PreliZ. "
"https://preliz.readthedocs.io/en/latest/api_reference.html#preliz.unidimensional.maxent",
FutureWarning,
stacklevel=2,
)

assert 0.01 <= mass <= 0.99, (
"This function optimizes the mass of the given distribution +/- "
f"1%, so `mass` has to be between 0.01 and 0.99. You provided {mass}."
)
if mass_below_lower is None:
mass_below_lower = (1 - mass) / 2

# exit when any parameter is not scalar:
if np.any(np.asarray(distribution.rv_op.ndims_params) != 0):
raise NotImplementedError(
"`pm.find_constrained_prior` does not work with non-scalar parameters yet.\n"
"Feel free to open a pull request on PyMC repo if you really need this feature."
)

dist_params = pt.vector("dist_params")
params_to_optim = {
arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess)))
}

if fixed_params is not None:
params_to_optim.update(fixed_params)

dist = distribution.dist(**params_to_optim)

try:
logcdf_lower = pm.logcdf(dist, pm.floatX(lower))
logcdf_upper = pm.logcdf(dist, pm.floatX(upper))
except AttributeError:
raise AttributeError(
f"You cannot use `find_constrained_prior` with {distribution} -- it doesn't have a logcdf "
"method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really "
"need it."
from preliz import distributions
from preliz.unidimensional import maxent
except ImportError as e:
raise ImportError(
"The `find_constrained_prior` function requires the `preliz` package. "
"You can install it via `pip install preliz` or `conda install -c conda-forge preliz`."
) from e

if fixed_params is None:
fixed_params = {}

if init_guess is not None:
warnings.warn(
"The `init_guess` argument is deprecated and will be removed in a future version. "
"The initial guess is determined automatically.",
FutureWarning,
stacklevel=2,
)

target = (pt.exp(logcdf_lower) - mass_below_lower) ** 2
target_fn = pm.pytensorf.compile([dist_params], target, allow_input_downcast=True)

constraint = pt.exp(logcdf_upper) - pt.exp(logcdf_lower)
constraint_fn = pm.pytensorf.compile([dist_params], constraint, allow_input_downcast=True)

jac: str | Callable
constraint_jac: str | Callable
try:
pytensor_jac = pm.gradient(target, [dist_params])
jac = pm.pytensorf.compile([dist_params], pytensor_jac, allow_input_downcast=True)
pytensor_constraint_jac = pm.gradient(constraint, [dist_params])
constraint_jac = pm.pytensorf.compile(
[dist_params], pytensor_constraint_jac, allow_input_downcast=True
if mass_below_lower is not None:
warnings.warn(
"The `mass_below_lower` argument is deprecated and will be removed in a future version. "
"Use `fixed_stat` or `mode` to add additional constraints.",
FutureWarning,
stacklevel=2,
)
# when PyMC cannot compute the gradient
except (NotImplementedError, NullTypeGradError):
jac = "2-point"
constraint_jac = "2-point"
cons = optimize.NonlinearConstraint(constraint_fn, lb=mass, ub=mass, jac=constraint_jac)

opt = optimize.minimize(
target_fn, x0=list(init_guess.values()), jac=jac, constraints=cons, **kwargs
)
if not opt.success:
raise ValueError(
f"Optimization of parameters failed.\nOptimization termination details:\n{opt}"

if kwargs:
warnings.warn(
"Passing additional keyword arguments is deprecated and will be "
"removed in a future version.",
DeprecationWarning,
stacklevel=2,
)

# save optimal parameters
opt_params = dict(zip(init_guess.keys(), opt.x))
if fixed_params is not None:
opt_params.update(fixed_params)
return opt_params
dist = getattr(distributions, distribution.__name__)

return maxent(
dist(**fixed_params), lower=lower, upper=upper, mass=mass, fixed_stat=fixed_stat, plot=False
).params_dict
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ numpydoc
pandas>=0.24.0
polyagamma
pre-commit>=2.8.0
preliz>=0.20.0
pymc-sphinx-theme>=0.16.0
pytensor>=2.32.0,<2.33
pytest-cov>=2.5
Expand Down
103 changes: 12 additions & 91 deletions tests/test_func_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,111 +12,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pytest

import pymc as pm


@pytest.mark.parametrize(
"distribution, lower, upper, init_guess, fixed_params, mass_below_lower",
"distribution, lower, upper, mass, fixed_params, fixed_stats",
[
(pm.Gamma, 0.1, 0.4, {"alpha": 1, "beta": 10}, {}, None),
(pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}, None),
(pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}, None),
(pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}, None),
(pm.Exponential, 0, 1, {"lam": 1}, {}, 0),
(pm.HalfNormal, 0, 1, {"sigma": 1}, {}, 0),
(pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}, None),
(pm.Poisson, 1, 15, {"mu": 10}, {}, None),
(pm.Poisson, 19, 41, {"mu": 30}, {}, None),
(pm.Gamma, 0.1, 0.4, None, {}, None),
(pm.StudentT, 0.1, 0.4, 0.7, {"nu": 7}, None),
(pm.Beta, 0, 0.7, 0.9, {}, ("mode", 0.25)),
],
)
@pytest.mark.parametrize("mass", [0.5, 0.75, 0.95])
def test_find_constrained_prior(
distribution, lower, upper, init_guess, fixed_params, mass, mass_below_lower
distribution,
lower,
upper,
mass,
fixed_params,
fixed_stats,
):
opt_params = pm.find_constrained_prior(
pm.find_constrained_prior(
distribution,
lower=lower,
upper=upper,
mass=mass,
init_guess=init_guess,
fixed_params=fixed_params,
mass_below_lower=mass_below_lower,
fixed_stat=fixed_stats,
)

opt_distribution = distribution.dist(**opt_params)
mass_in_interval = (
pm.math.exp(pm.logcdf(opt_distribution, upper))
- pm.math.exp(pm.logcdf(opt_distribution, lower))
).eval()
assert np.abs(mass_in_interval - mass) <= 1e-5


@pytest.mark.parametrize(
"distribution, lower, upper, init_guess, fixed_params",
[
(pm.Gamma, 0.1, 0.4, {"alpha": 1}, {"beta": 10}),
(pm.Exponential, 0.1, 1, {"lam": 1}, {}),
(pm.Binomial, 0, 2, {"p": 0.8}, {"n": 10}),
],
)
def test_find_constrained_prior_error_too_large(
distribution, lower, upper, init_guess, fixed_params
):
with pytest.raises(
ValueError, match="Optimization of parameters failed.\nOptimization termination details:\n"
):
pm.find_constrained_prior(
distribution,
lower=lower,
upper=upper,
mass=0.95,
init_guess=init_guess,
fixed_params=fixed_params,
)


def test_find_constrained_prior_input_errors():
# missing param
with pytest.raises(TypeError, match="required positional argument"):
pm.find_constrained_prior(
pm.StudentT,
lower=0.1,
upper=0.4,
mass=0.95,
init_guess={"mu": 170, "sigma": 3},
)

# mass too high
with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"):
pm.find_constrained_prior(
pm.StudentT,
lower=0.1,
upper=0.4,
mass=0.995,
init_guess={"mu": 170, "sigma": 3},
fixed_params={"nu": 7},
)

# mass too low
with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"):
pm.find_constrained_prior(
pm.StudentT,
lower=0.1,
upper=0.4,
mass=0.005,
init_guess={"mu": 170, "sigma": 3},
fixed_params={"nu": 7},
)

# non-scalar params
with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"):
pm.find_constrained_prior(
pm.MvNormal,
lower=0,
upper=1,
mass=0.95,
init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])},
)
Loading