Commit ca98a60b authored by Jason Rhinelander's avatar Jason Rhinelander

Added BayesianLinear classes

parent 6b8ae0e0
......@@ -80,6 +80,7 @@ endfunction()
add_pyeris_module(core)
add_pyeris_module(position)
add_pyeris_module(learning)
# We configure setup.py twice: the first during cmake configuration, which handles things like the version,
......
Subproject commit 8fef1b8fcb3bbd7adc5be83eca726669ff93e96f
Subproject commit 89219c59c471fdbd7aec5c8eeccce0b6b0a311e5
#include "pyeris/common.hpp"
#include "pyeris/learning/BayesianLinear.hpp"
#include "pyeris/learning/BayesianLinearRestricted.hpp"
namespace pyeris {
PYBIND11_PLUGIN(learning) {
py::module m("eris.learning", "eris interface for Python -- agent learning classes");
learning::bind_BL(m);
learning::bind_BLR(m);
return m.ptr();
}
}
#include "pyeris/learning/BayesianLinear.hpp"
#include <pybind11/eigen.h>
#include <pybind11/stl.h>
using namespace Eigen;
using namespace eris::learning;
namespace pyeris { namespace learning {
void bind_BL(py::module &m) {
py::class_<BayesianLinear, std::unique_ptr<BayesianLinear>, PyBL<>> bl(m, "BayesianLinear");
bl
.def(py::init<unsigned, MatrixXd, VectorXd>(),
"K"_a, "noninf_X"_a = MatrixXd(), "noninf_y"_a = VectorXd(),
"Creates a non-informative belief with optional initial data. The belief will remain non-informative until enough data of sufficient variability has been acquired to form an informative belief distribution")
.def(py::init<VectorXd, double, MatrixXd, double>(),
"beta"_a, "s2"_a, "V_inverse"_a, "n"_a,
"Creates a belief with the given prior parameters")
.def(py::init<const BayesianLinear&, const Ref<const VectorXd>&, const Ref<const MatrixXd>&, double>(),
"prior"_a, "y"_a, "X"_a, "sd_scale"_a = 1.0,
"Constructs a new belief distribution by combining the given prior with the given observations. sd_scale allows adjustment of the prior's variance to weakning (> 1) or strengthen (< 1) the prior relative to the new observations (the variance is multiplied by the square of `sd_scale`)")
.def(py::init<const BayesianLinear&, double>(),
"prior"_a, "sd_scale"_a,
"Constructs a new belief distribution by weakening (sd_scale > 1) or strengthening (sd_scale < 1) the variance of the belief")
.def("fixed_model_size", &BayesianLinear::fixedModelSize, "This method can be overridden by a subclass to return a non-zero number of parameters to indicate that the class supports only the given number of parameters; this will be checked during class construction. If it returns 0 (the default), the number of parameters is determined from the constructor parameters")
.def_property_readonly("noninformative", &BayesianLinear::noninformative, "Returns True if this model is noninformative (because it has not yet had sufficient data incorporated from an initial noninformative prior)")
.def_property_readonly("beta", &BayesianLinear::beta, "Accesses the posterior distribution's beta parameter")
.def_property_readonly("s2", &BayesianLinear::s2, "Accesses the posterior distribution's s^2 parameter")
.def_property_readonly("n", &BayesianLinear::n, "Accesses the posterior distribution's n parameter")
.def_property_readonly("Vinv", &BayesianLinear::Vinv, "Accesses the posterior distribution's V parameter, stored and returned as an inverse")
.def_property_readonly("Vinvinv", &BayesianLinear::Vinvinv, "Calculates and returns the posterior distribution's V parameter. This is calculated from the stored inverse the first time it is accessed")
.def_property_readonly("K", &BayesianLinear::K, "Returns K, the number of parameters of this model's linear equation")
.def("predict", &BayesianLinear::predict, "X"_a, "draws"_a = 0, "Takes one or more rows of X data and predicts the associated means by averaging the results of many draws from the posterior. Draws are reused across predictions, both for X rows and multiple .predict() calls. If the requested number of draws is given, that many draws will be used; if 0, all previously cached draws are used, or 1000 if there are no previous draws.")
.def("predict_variance", &BayesianLinear::predictVariance, "X"_a, "draws"_a = 0, "Takes one or more rows of X data and predicts both a mean and variance of predicted values by averaging over the results of many draws from the posterior. Draws are reused across predictions, both for X rows and multiple .predict() calls. If the requested number of draws is given, that many draws will be used; if 0, all previously cached draws are used, or 1000 if there are no previous draws. Returns a matrix of the same number of rows as X: the first column is set to prediction mean, the second to the prediction variance")
.def("predict_generic", (MatrixXd (BayesianLinear::*)(const Eigen::Ref<const Eigen::MatrixXd>&, const std::vector<std::function<double(double y)>>&, unsigned int)) &BayesianLinear::predictGeneric,
"X"_a, "functions"_a, "draws"_a = 0,
"Like predict and predict_variance, but obtains means of arbitrary functions of the predictions across multiple draws. The mean of each function in `functions` is stored as a column in the returned data. For example, .predict is equivalent to .predict_generic(X, [lambda x: x]")
.def("predict_generic", (MatrixXd (BayesianLinear::*)(const Eigen::Ref<const Eigen::MatrixXd>&, const std::function<double(double)>&, unsigned int)) &BayesianLinear::predictGeneric,
"X"_a, "function"_a, "draws"_a = 0,
"Single-function version of predict_generic: this takes just one function instead of a list of functions")
.def("discard", &BayesianLinear::discard, "Discards any previously obtained prediction draws. The next prediction will take new draws from the posterior distribution")
.def("draw", &BayesianLinear::draw, "Draws a set of beta and s^2 values according to the posterior and returns them in a column vector; elements 0 through K-1 are the drawn beta values, element K is the s^2 value. This method is used for prediction and can be overridden to affect the distribution of drawn values")
.def_property_readonly("last_draw", &BayesianLinear::lastDraw, "Accesses the last draw returned by draw(). The first K elements are the drawn beta values; the K+1th element is the drawn s^2 value")
.def_property_readonly("noninf_X_data", &BayesianLinear::noninfXData, "Returns the X data that has yet to be incorporated into this noninformative model. Raises an error if `.noninformative` is False")
.def_property_readonly("noninf_y_data", &BayesianLinear::noninfXData, "Returns the y data that has yet to be incorporated into this noninformative model. Raises an error if `.noninformative` is False")
.def("__str__", &BayesianLinear::operator std::string, "Returns a string representation of this belief")
.def("display_name", &BayesianLinear::display_name, "Returns the display name of this model as used in __str__. The default is 'BayesianLinear'.")
.def_property("names", (const std::vector<std::string>& (BayesianLinear::*)() const) &BayesianLinear::names, (void (BayesianLinear::*)(const std::vector<std::string>&)) &BayesianLinear::names,
"Accesses the list of parameter names for this model. The default names are simply numbers from 0 to K-1")
;
py::register_exception<BayesianLinear::draw_failure>(m, "DrawFailure");
}
}}
#pragma once
#include <eris/learning/BayesianLinear.hpp>
#include "pyeris/common.hpp"
using namespace Eigen;
using namespace eris::learning;
namespace pyeris { namespace learning {
template <class Base = BayesianLinear>
class PyBL : public Base {
using Base::Base;
// Virtual methods:
virtual unsigned int fixedModelSize() const override { PYBIND11_OVERLOAD_NAME(unsigned int, Base, "fixed_model_size", fixedModelSize); }
operator std::string() const override { PYBIND11_OVERLOAD_NAME(std::string, Base, "__str__", operator std::string); }
std::string display_name() const override { PYBIND11_OVERLOAD(std::string, Base, display_name); }
// Also virtual, but if overloaded we need to call the overload, then store the returned value
// in last_draw_.
const VectorXd& draw() override {
if (_overload_draw("draw")) return last_draw_;
return Base::draw();
}
MatrixXd predictGeneric(const Ref<const MatrixXd> &X, const std::vector<std::function<double(double y)>> &g, unsigned int draws = 0) override {
PYBIND11_OVERLOAD_NAME(MatrixXd, BayesianLinear, "predict_generic", predictGeneric, X, g, draws);
}
protected:
using Base::last_draw_;
using Base::K_;
// Helper method for helping with draw overloads that need to set last_draw_ to the returned
// value if called via pybind overload.
template <typename... Args>
bool _overload_draw(const char *method, Args &&... args ) {
py::gil_scoped_acquire gil;
py::function overload = py::get_overload(this, method);
if (overload) {
if (last_draw_.size() != K_ + 1) last_draw_.resize(K_ + 1);
last_draw_ = overload(std::forward<Args>(args)...).template cast<VectorXd>();
if (last_draw_.size() != K_ + 1) throw std::runtime_error("Error: draw return did not have K+1 elements!");
return true;
}
return false;
}
};
void bind_BL(py::module &m);
}}
#include "pyeris/learning/BayesianLinearRestricted.hpp"
#include <pybind11/operators.h>
#include <pybind11/eigen.h>
namespace pyeris { namespace learning {
void bind_BLR(py::module &m) {
using BLR = BayesianLinearRestricted;
py::class_<BLR, std::unique_ptr<BLR>, PyBLR<>> blr(m, "BayesianLinearRestricted", py::base<BayesianLinear>());
// Don't both with RestrictionProxy: RestrictionIneqProxy can do everything it does.
py::class_<BLR::RestrictionIneqProxy> restr_ineq(m, "RestrictionIneqProxy");
py::enum_<BLR::DrawMode>(m, "DrawMode")
.value("Auto", BLR::DrawMode::Auto)
.value("Rejection", BLR::DrawMode::Rejection)
.value("Gibbs", BLR::DrawMode::Gibbs)
;
py::register_exception<BLR::constraint_failure>(m, "ConstraintFailure");
blr
.def(py::init<unsigned, MatrixXd, VectorXd>(),
"K"_a, "noninf_X"_a = MatrixXd(), "noninf_y"_a = VectorXd(),
"Creates a non-informative belief with optional initial data. The belief will remain non-informative until enough data of sufficient variability has been acquired to form an informative belief distribution")
.def(py::init<VectorXd, double, MatrixXd, double>(),
"beta"_a, "s2"_a, "V_inverse"_a, "n"_a,
"Creates a belief with the given prior parameters")
.def(py::init<const BayesianLinear&, const Ref<const VectorXd>&, const Ref<const MatrixXd>&, double>(),
"prior"_a, "y"_a, "X"_a, "sd_scale"_a = 1.0,
"Constructs a new belief distribution by combining the given prior with the given observations. sd_scale allows adjustment of the prior's variance to weakning (> 1) or strengthen (< 1) the prior relative to the new observations (the variance is multiplied by the square of `sd_scale`)")
.def(py::init<const BayesianLinear&, double>(),
"prior"_a, "sd_scale"_a,
"Constructs a new belief distribution by weakening (sd_scale > 1) or strengthening (sd_scale < 1) the variance of the belief")
.def("restrict", (BLR::RestrictionIneqProxy (BLR::*)(size_t)) &BLR::restrict, "k"_a, py::keep_alive<0, 1>(), "Returns an object used to set or access single-parameter restrictions on parameter `k`. See RestrictionIneqProxy for details")
.def("add_restriction", &BLR::addRestriction, "R"_a, "r"_a, "Adds a single linear restriction of the form `R * beta <= r`")
.def("add_restrictions", &BLR::addRestrictions, "R"_a, "r"_a, "Adds a set of linear restrictions of the form `R * beta <= r`, where the ith row in R corresponds to the ith value of r")
.def("add_restriction_ge", &BLR::addRestrictionGE, "R"_a, "r"_a, "Adds a single linear restriction of the form `R * beta >= r`. This is identical to calling `add_restriction(-R, -r)`")
.def("add_restrictions_ge", &BLR::addRestrictionsGE, "R"_a, "r"_a, "Adds a set of linear restrictions of the form `R * beta >= r`, where the ith row in R corresponds to the ith value of r. This is simply a shortcut for calling `add_restrictions(-R, -r)`")
.def_property_readonly("restrictions", &BLR::numRestrictions, "Returns the number of linear restrictions currently applied to the model.")
.def("remove_restriction", &BLR::removeRestriction, "r"_a, "Removes the `r`th restriction from the model.")
.def("clear_restrictions", &BLR::clearRestrictions, "Removes all restrictions from the model.")
.def("draw", &BLR::draw, "Draws a vector of beta and s^2. This will either use rejection sampling, gibbs sampling, or first rejection sampling with fallback to Gibbs sampling, depending on the value of `draw_mode`")
.def_readwrite("draw_mode", &BLR::draw_mode, "Set to one of DrawMode.Auto, DrawMode.Rejection, or DrawMode.Gibbs. Auto mode tries rejection sampling and falls back to gibbs sampling if the acceptance rate is too low")
.def_readonly("last_draw_mode", &BLR::last_draw_mode, "Set to DrawMode.Rejection or DrawMode.Gibbs to indicate the last draw mode that was used")
.def("draw_gibbs", &BLR::drawGibbs, "Draws a vector of beta and s^2 using Gibbs sampling instead of rejection sampling. This method is typically indirectly by a call to draw()")
.def("gibbs_initialize", &BLR::gibbsInitialize, "initial_beta"_a, "max_tries"_a = 100, "Initializes the Gibbs sampler by starting at the given point and making adjustments, if necessary, to attempt to obtain a point that does not violate any restrictions. `max_tries` is the number of adjustments that will be attmped before giving up by raising a ConstraintFailure exception")
.def("draw_rejection", &BLR::drawRejection, "max_discards"_a = -1, "Attempts to obtain a draw using rejection sampling: repeatedly drawing until the drawn value does not violate any constraints. `max_discards` is the maximum number of discarded draws to tolerate before raising an exception; if -1 (the default), the maximum draws comes from `draw_rejection_max_discards`")
.def_property_readonly("R", &BLR::R, "Obtains a copy of the restriction coefficient selection matrix `R` such that `obj.R * beta <= obj.r` represents the linear restrictions of the model")
.def_property_readonly("r", &BLR::r, "Obtains a copy of the restriction values vector `r` such that `obj.R * beta <= obj.r` represents the linear restrictions of the model")
.def_readwrite("draw_rejection_max_discards", &BLR::draw_rejection_max_discards, "The maximum number of inadmissable draws for a single rejection draw before aborting. Defaults to 50. When draw_rejection's `max_discards` parameter overrides this value.")
.def_readwrite("draw_auto_min_success_rate", &BLR::draw_auto_min_success_rate, "The minimum draw success rate below which draw() switches to Gibbs sampling (when in Auto mode). Defaults to 0.2. At least `draw_auto_min_rejection` rejection attempts must have been made before this rate applies.")
.def_readwrite("draw_auto_min_rejection", &BLR::draw_auto_min_rejection, "The minimum number of Auto-mode rejection attempts before considering the success rate. Defaults to 50.")
.def_readwrite("draw_gibbs_burnin", &BLR::draw_gibbs_burnin, "The number of burn-in draws for the first Gibbs sampler draw. Defaults to 100.")
.def_readwrite("draw_gibbs_thinning", &BLR::draw_gibbs_thinning, "draw_gibbs() uses every `draw_gibbs_thinning`th sample (1 = use every draw). Defaults to 2 (using every other draw).")
.def_readonly("draw_rejection_discards_last", &BLR::draw_rejection_discards_last, "Diagnostic: tracks the number of inadmissable draws by the most recent call to draw_rejection()")
.def_readonly("draw_rejection_success", &BLR::draw_rejection_success, "Diagnostic: the cumulative number of successful rejection draws")
.def_readonly("draw_rejection_discards", &BLR::draw_rejection_discards, "Diagnostic: the cumulative number of inadmissable rejection draws")
.def_readonly("draw_gibbs_success", &BLR::draw_gibbs_success, "Diagnostic: the cumulative number of gibbs draws obtained, not including burn-in and thinning draws")
.def_readonly("draw_gibbs_discards", &BLR::draw_gibbs_discards, "Diagnostic: the cumulative number of discarded (for burn-in or thinning) gibbs draws")
;
restr_ineq
.def("has_upper_bound", &BLR::RestrictionIneqProxy::hasUpperBound, "Returns true if an upper bound restriction has been applied to the referenced parameter. Multi-parameter restrictions are not considered.")
.def("upper_bound", &BLR::RestrictionIneqProxy::upperBound, "Returns the most-binding upper bound restriction that has been applied to the referenced parameter. Multi-parameter restrictions are not considered. Returns NaN if the parameter has no single-parameter upper bound restriction")
.def("has_lower_bound", &BLR::RestrictionIneqProxy::hasUpperBound, "Returns true if a lower bound restriction has been applied to the referenced parameter. Multi-parameter restrictions are not considered.")
.def("lower_bound", &BLR::RestrictionIneqProxy::lowerBound, "Returns the most-binding lower bound restriction that has been applied to the referenced parameter. Multi-parameter restrictions are not considered. Returns NaN if the parameter has no single-parameter lower bound restriction")
.def("__le__", [](BLR::RestrictionIneqProxy &self, double upper) { self.operator<=(upper); }, "Adds an upper bound restriction to the referenced parameter. For example, to restrict parameter 2 to negative values use `obj.restrict(2) <= 0`.")
.def("__ge__", [](BLR::RestrictionIneqProxy &self, double lower) { self.operator>=(lower); }, "Adds a lower bound restriction to the referenced parameter. For example, to restrict parameter 2 to positive values use `obj.restrict(2) >= 0`.")
;
}
}}
#pragma once
#include <eris/learning/BayesianLinearRestricted.hpp>
#include "pyeris/common.hpp"
#include "pyeris/learning/BayesianLinear.hpp"
namespace pyeris { namespace learning {
template <class Base = BayesianLinearRestricted>
class PyBLR : public PyBL<Base> {
public:
using PyBL<Base>::PyBL;
using PyBL<Base>::last_draw_;
using PyBL<Base>::_overload_draw;
const VectorXd& drawGibbs() override {
if (_overload_draw("draw_gibbs")) return last_draw_;
return Base::drawGibbs();
}
const VectorXd& drawRejection(long max_discards = -1) override {
if (_overload_draw("draw_rejection", max_discards)) return last_draw_;
return Base::drawRejection();
}
};
void bind_BLR(py::module &m);
}}
from eris.learning import BayesianLinear
import numpy as np
from random import normalvariate
from numpy.linalg import lstsq, norm
def print_model(name, model):
print("%s:" % name)
print(" β_:", model.beta.transpose())
print(" n_:", model.n)
print(" s²_:", model.s2)
print(" V⁻¹:", model.Vinv)
def print_OLS(name, beta, X, y):
print("%s:" % name)
print(" β^:", beta.transpose())
print(" σ^²:", norm(y - X.dot(beta)) ** 2 / X.shape[0])
print(" X'X:", X.transpose().dot(X))
foo = BayesianLinear(3)
beta = np.array([[-1], [4], [0.5]])
X = np.empty([100,3])
u = np.empty([100,1])
for t in range(100):
for k in range(3):
X[t][k] = normalvariate(0, 1)
u[t] = normalvariate(0, 2.5)
y = X.dot(beta) + u
betahat = lstsq(X, y)[0]
print_OLS("OLS", betahat, X, y);
foo_100_oneshot = BayesianLinear(foo, y, X);
foo_100_twoshot = BayesianLinear(foo, y[0:50], X[0:50,:])
foo_100_twoshot = BayesianLinear(foo_100_twoshot, y[50:100], X[50:100,:])
foo_100_fiveshot = foo
for i in range(0, 100, 20):
foo_100_fiveshot = BayesianLinear(foo_100_fiveshot, y[i:(i+20)], X[i:(i+20),:])
foo_100_tenshot = foo;
for i in range(0, 100, 10):
foo_100_tenshot = BayesianLinear(foo_100_tenshot, y[i:(i+10)], X[i:(i+10),:])
foo_100_hundredshot = foo;
for i in range(0, 100, 1):
foo_100_hundredshot = BayesianLinear(foo_100_hundredshot, y[i], X[[i],:])
print_model("all-at-once", foo_100_oneshot)
print_model("halfs", foo_100_twoshot)
print_model("by 20s", foo_100_fiveshot)
print_model("by 10s", foo_100_tenshot)
print_model("by 1s", foo_100_hundredshot)
foo_100_weakened_twoshot = BayesianLinear(foo, y[0:50], X[0:50,:])
foo_100_weakened_twoshot = BayesianLinear(foo_100_weakened_twoshot, y[50:100], X[50:100,:], 2.0)
Xw = np.append(X[0:50,:] * 0.5, X[50:100,:], 0)
yw = np.append(y[0:50] * 0.5, y[50:100], 0)
foo_100_weakened_direct = BayesianLinear(foo, yw, Xw);
print_model("weighted direct:", foo_100_weakened_direct)
print_model("weakened half:", foo_100_weakened_twoshot)
betahat_weakhalf = lstsq(Xw, yw)[0]
print_OLS("OLS, weighted half", betahat_weakhalf, Xw, yw)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment