Commit 1ca6a8a0 authored by Jason Rhinelander's avatar Jason Rhinelander

BayesianLinear prediction fixes and enhancements

BayesianLinear prediction was pretty screwed up: the distribution of
predicted values was messed up, and error variances weren't being
incorporated at all.  This fixes it.

Since this is a pretty major fix, bumping the eris version.

This also changes the predict interface to be able to predict multiple
rows at once (instead of just one row), and adds a new predictGeneric()
method that allows prediction of arbitrary functions of y*.  One such
implementation -- which returns both the mean and variance of the
prediction values -- is included in the new predictVariance() method.

This, however, breaks API compatibility, so bumping the lib version.

linear-draw-test is a new scratch script that verifies that the
distributions are actually what Koop says they should be.

This also adds a (non-truncated) multivariate t generator to go along
with multivariateNormal(), and a rmvt scratch script to test it.
parent 92f217c0
......@@ -12,7 +12,7 @@ set(eris_description "Agent-based economic modelling library")
# Eris package version
set(ERIS_VERSION_MAJOR "0")
set(ERIS_VERSION_MINOR "3")
set(ERIS_VERSION_PATCH "4")
set(ERIS_VERSION_PATCH "5")
set(ERIS_VERSION "${ERIS_VERSION_MAJOR}.${ERIS_VERSION_MINOR}.${ERIS_VERSION_PATCH}")
# eris library version (CURRENT.REVISION.AGE), which is totally separate
......@@ -32,7 +32,7 @@ set(ERIS_VERSION "${ERIS_VERSION_MAJOR}.${ERIS_VERSION_MINOR}.${ERIS_VERSION_PAT
# (So something like 3.7.1 indicates the 8th revision of the liberis-3
# interface, and that code that links against liberis-2.* can link against this
# version, but code that links against liberis-1.* cannot.
set(LIBERIS_CURRENT "3")
set(LIBERIS_CURRENT "4")
set(LIBERIS_REVISION "0")
set(LIBERIS_AGE "0")
......
This diff is collapsed.
......@@ -300,7 +300,7 @@ class BayesianLinearRestricted : public BayesianLinear {
* \par Implementation details: drawing beta values
*
* In general, a non-truncated multivariate \f$t(\mu, \Sigma, \nu)\f$ can easily be formed
* by adding the mean to \f$\sqrt{\nu / \chi^2_{\nu}}\f$ times a draw from the
* by adding the mean to \f$\sqrt{\nu / (s^2 \chi^2_{\nu})}\f$ times a draw from the
* (non-truncated) multivariate \f$N(\mathbf{0}, \mathbf{\Sigma})\f$ distribution (which
* itself is easily drawn as \f$\mu + \mathbf{L} \mathbf{z}\f$, where \f$\mathbf{L}
* \mathbf{L}^\top = \mathbf{\Sigma}\f$ (i.e.\ the Cholesky decomposition of
......
#include <iostream>
#include <eris/belief/BayesianLinearRestricted.hpp>
#include <Eigen/Cholesky>
#include <Eigen/QR>
#include <eris/Random.hpp>
#include <list>
using namespace eris::belief;
using namespace Eigen;
int main() {
VectorXd beta(3);
beta << 10, 2, -5;
MatrixXd V(3,3);
V << 1.7, 0.25, 0.4,
0.25, 5.5, 1,
0.4, 1, 8;
double s2 = 2.0;
double n = 10;
BayesianLinearRestricted model(beta, s2, V.fullPivHouseholderQr().inverse(), n);
model.draw_mode = BayesianLinearRestricted::DrawMode::Gibbs;
std::cout << "s2=" << model.s2() << ", V:\n" << model.Vinv().fullPivHouseholderQr().inverse() << "\n";
MatrixXd X(5, 3);
X <<
1, 4, 0,
2, 2, 2,
-3, 5, 17,
1, 1, 1,
8, 0, 6;
VectorXd yposterior = X * model.beta();
std::cout << "X beta_post: " << yposterior.transpose() << "\n";
unsigned ndraws = 100000;
VectorXd s2idraws(ndraws);
MatrixXd betadraws(model.K(), ndraws);
MatrixXd ypred(X.rows(), ndraws);
MatrixXd gammadraws(model.K(), ndraws);
std::cerr << "Vinv -> inverse -> cholL:\n" << MatrixXd(model.Vinv().fullPivHouseholderQr().inverse().llt().matrixL()) << "\n";
std::cerr << "VcholL():\n" << model.VcholL() << "\n";
std::cerr << "VinvChol():\n" << model.VinvCholL() << "\n";
MatrixXd gammaL = model.VcholL(); //(model.Vinv().fullPivHouseholderQr().inverse()).llt().matrixL();
for (unsigned i = 0; i < ndraws; i++) {
model.discard();
ypred.col(i) = model.predict(X, 1);
betadraws.col(i) = model.lastDraw().head(3);
s2idraws[i] = 1.0 / model.lastDraw()[3];
gammadraws.col(i) = BayesianLinear::multivariateT(model.beta(), model.n(), gammaL, std::sqrt(model.s2()));
}
VectorXd means = ypred.rowwise().mean();
VectorXd var = ((ypred.colwise() - means).rowwise().squaredNorm() / (ndraws-1));
std::cout << "s^-2 draws mean: " << s2idraws.mean() << " (expect " << 1/s2 << ")\n";
std::cout << "s^-2 draws var: " << (s2idraws.array() - s2idraws.mean()).matrix().squaredNorm() / (ndraws-1) <<
" (expect " << 2/(n*s2*s2) << ")\n";
VectorXd betadrawmeans = betadraws.rowwise().mean();
MatrixXd beta_demeaned = betadraws.colwise() - betadrawmeans;
MatrixXd betadrawvar = beta_demeaned * beta_demeaned.transpose() / (ndraws-1);
std::cout << "betamean: " << betadrawmeans.transpose() << " (expect " << model.beta().transpose() << ")\n";
std::cout << "betavar:\n" << betadrawvar << "\nexpected var:\n" << model.n() * model.s2() / (model.n()-2) * model.Vinv().fullPivHouseholderQr().inverse()
<< "\n";
VectorXd gammadrawmeans = gammadraws.rowwise().mean();
MatrixXd gamma_demeaned = gammadraws.colwise() - gammadrawmeans;
MatrixXd gammadrawvar = gamma_demeaned * gamma_demeaned.transpose() / (ndraws-1);
std::cout << "gammamean: " << gammadrawmeans.transpose() << " (expect " << model.beta().transpose() << ")\n";
std::cout << "gammavar:\n" << gammadrawvar << "\nexpected var:\n" << model.n() * model.s2() / (model.n()-2) * model.Vinv().fullPivHouseholderQr().inverse()
<< "\n";
std::cout << "y* means: " << means.transpose() << "\n";
std::cout << "y* varis: " << var.transpose() << "\n";
auto mv = model.predictVariance(X, 10000);
std::cout << "y*pv means: " << mv.col(0).transpose() << "\n";
std::cout << "y*pv vars: " << mv.col(1).transpose() << "\n";
MatrixXd L = (model.s2() * (MatrixXd::Identity(X.rows(), X.rows()) + X * V * X.transpose())).llt().matrixL();
MatrixXd yshould(X.rows(), ndraws);
for (unsigned i = 0; i < ndraws; i++) {
yshould.col(i) = BayesianLinear::multivariateT(
X*model.beta(),
model.n(),
L);
}
VectorXd shouldmeans = yshould.rowwise().mean();
VectorXd shouldvar = ((yshould.colwise() - means).rowwise().squaredNorm() / (ndraws-1));
std::cout << "should means: " << shouldmeans.transpose() << "\n";
std::cout << "should varis: " << shouldvar.transpose() << "\n";
}
#include <iostream>
#include <eris/belief/BayesianLinear.hpp>
#include <Eigen/Cholesky>
#include <eris/Random.hpp>
#include <list>
#include <ctime>
extern "C" {
#include <unistd.h>
#include <sys/wait.h>
#include <sys/times.h>
}
using namespace eris::belief;
using namespace Eigen;
std::string matrix_to_Rc(const Ref<const MatrixXd> &m) {
std::ostringstream str;
str << "c(";
bool first = true;
for (unsigned c = 0; c < m.cols(); c++) for (unsigned r = 0; r < m.rows(); r++) {
if (first) first = false;
else str << ",";
str << m(r,c);
}
str << ")";
return str.str();
}
int main() {
MatrixXd sigma(5,5);
sigma << 5, 4, 3, 2, 1,
4, 6, 0.5, 0, 1,
3, 0.5, 7, 0, 1.5,
2, 0, 0, 8, 0.1,
1, 1, 1.5, 0.1, 9;
MatrixXd L = sigma.llt().matrixL();
VectorXd mu(5);
mu << 8, 1, 1, 150, 888;
double df = 8;
std::cout << "mu: " << mu.transpose() << "\n\nsigma:\n" << sigma << "\ndf: " << df << "\n\nmvt:\n====\n";
clock_t start = std::clock();
MatrixXd draws(5, 1000000);
for (unsigned i = 0; i < draws.cols(); i++) {
draws.col(i) = BayesianLinear::multivariateT(mu, df, L);
}
clock_t end = std::clock();
VectorXd means = draws.rowwise().mean();
VectorXd var = ((draws.colwise() - means).rowwise().squaredNorm() / (draws.cols()-1));
std::cout << "means:\n" << means.transpose() << "\n\nvariance:\n" << var.transpose() << "\n\n";
double s = (double)(end - start) / CLOCKS_PER_SEC;
std::cout << "Elapsed: " << s << " s (" << std::lround(draws.cols() / s) << " draws/s)\n";
std::ostringstream Rmvtnorm, Rmnormt;
#define BOTH(WHATEVER) Rmvtnorm << WHATEVER; Rmnormt << WHATEVER
Rmvtnorm << "require(mvtnorm,quietly=T);";
Rmnormt << "require(mnormt,quietly=T);set.seed(" << (int) eris::Random::rng()() << ");";
BOTH("S<-matrix(nrow=" << sigma.rows() << "," << matrix_to_Rc(sigma) << ");" <<
"M<-" << matrix_to_Rc(mu) << ";");
BOTH("t0<-proc.time()[3];");
Rmvtnorm << "d<-M+t(rmvt(" << draws.cols() << ",S," << df << "));";
Rmnormt << "d<-t(rmt(" << draws.cols() << ",M,S," << df << "));";
BOTH("tZ<-proc.time()[3];");
BOTH("m<-rowMeans(d);"
<< "v<-apply(d,1,function(x)var(x));"
<< "z<-" << matrix_to_Rc(means) << ";Z<-" << matrix_to_Rc(var) << ";"
<< R"(cat("Means:",m,"\nVariances:",v,"\nEris minus )");
Rmvtnorm << "mvtnorm"; Rmnormt << "mnormt";
BOTH(R"( means:",z-m,"\nEris minus )");
Rmvtnorm << "mvtnorm"; Rmnormt << "mnormt";
BOTH(R"( variances:",Z-v,"\n\n");)");
BOTH(R"(cat("Elapsed:",tZ-t0,"s (",ncol(d)/(tZ-t0)," draws/s)\n\n"))");
std::list<std::pair<std::string, std::string>> commands{
{"mvtnorm", Rmvtnorm.str()},
{"mnormt", Rmnormt.str()}};
for (auto &R : commands) {
std::cout << "\n\nRunning R with package " << R.first << " for comparison:\n====\n";
pid_t child = fork();
if (child == 0) {
// Child
execlp("R", "R", "--slave", "-e", R.second.c_str(), nullptr);
// Getting here means exec failed!
std::cerr << "Failed to execute R: " << std::strerror(errno) << ". Tried to execute:\nR --slave -e '" << R.second << "'\n";
return errno;
}
else if (child == -1) {
std::cerr << "Fork failed: " << std::strerror(errno) << "\n";
return errno;
}
else {
wait(nullptr);
}
}
}
......@@ -98,41 +98,97 @@ void BayesianLinear::names(const std::vector<std::string> &names) {
beta_names_default_ = false;
}
double BayesianLinear::predict(const Eigen::Ref<const Eigen::RowVectorXd> &Xi, unsigned int draws) {
VectorXd BayesianLinear::predict(const Ref<const MatrixXd> &X, unsigned int draws) {
VectorXd y;
y = predictGeneric(X, [](double y) { return y; }, draws);
return y;
}
MatrixXd BayesianLinear::predictVariance(const Ref<const MatrixXd> &X, unsigned int draws) {
if (draws == 1 or (draws == 0 and prediction_draws_.cols() == 1))
throw std::logic_error("predictVariance() cannot calculate variance using only 1 draw");
std::vector<std::function<double(double)>> g;
g.emplace_back([](double y) { return y; }); // Mean
g.emplace_back([](double y) { return y*y; }); // E[y^2]
MatrixXd results = predictGeneric(X, g, draws);
results.col(1) =
draws / (double)(draws-1) * // Correct for sample variance bias
(results.col(1) - results.col(0).array().square().matrix()); // E[y^2] - E[y]^2
return results;
}
MatrixXd BayesianLinear::predictGeneric(const Ref<const MatrixXd> &X, const std::vector<std::function<double(double y)>> &g, unsigned draws) {
if (noninformative_)
throw std::logic_error("Cannot call predict() on a noninformative model");
throw std::logic_error("Cannot call predict using a noninformative model");
if (g.empty())
throw std::logic_error("predictGeneric() called without any g() functions");
if (draws == 0) draws = mean_beta_draws_ > 0 ? mean_beta_draws_ : 1000;
if (draws == 0) draws = prediction_draws_.cols() > 0 ? prediction_draws_.cols() : 1000;
// If fewer draws were requested, we need to start over
if (draws < mean_beta_draws_) {
mean_beta_draws_ = 0;
if (draws > prediction_draws_.cols()) {
unsigned i = prediction_draws_.cols();
prediction_draws_.conservativeResize(K_+1, draws);
for (; i < draws; i++) {
prediction_draws_.col(i) = draw();
}
}
if (draws > mean_beta_draws_) {
// First sum up new draws to make up the difference:
VectorXd new_beta = VectorXd::Zero(K_);
for (long i = mean_beta_draws_; i < draws; i++) {
new_beta += draw().head(K_);
}
// Turn into a mean:
new_beta /= draws;
// Draw new error terms, if needed.
unsigned err_cols = prediction_errors_.cols(), err_rows = prediction_errors_.rows();
// Need more rows:
if (err_rows < X.rows()) {
err_rows = X.rows();
// We need new rows, but have to make sure that errors doesn't have more columns than
// draws (because we can't draw new errors for those columns)
if (err_cols > prediction_draws_.cols()) err_cols = prediction_draws_.cols();
}
// If we had no draws at all before, just use the new vector
if (mean_beta_draws_ == 0) {
mean_beta_.swap(new_beta);
}
else {
// Otherwise we need to combine means by calculating a weighted mean of the two means,
// weighted by each mean's proportion of draws:
double w_existing = (double) mean_beta_draws_ / draws;
mean_beta_ = w_existing * mean_beta_ + (1 - w_existing) * new_beta;
// Need more columns:
if (err_cols < prediction_draws_.cols()) {
err_cols = prediction_draws_.cols();
}
auto &rng = Random::rng();
if (err_cols != prediction_errors_.cols() or err_rows != prediction_errors_.rows()) {
// Keep track of where we need to start filling:
unsigned startc = prediction_errors_.cols();
unsigned startr = prediction_errors_.rows();
prediction_errors_.conservativeResize(err_rows, err_cols);
for (unsigned c = (startr < err_rows ? 0 : startc); c < err_cols; c++) {
// For the first startc columns, we need to draw values for any new rows; for startc and
// beyond we need to draw values for every row:
std::normal_distribution<double> err(0, std::sqrt(prediction_draws_(K_, c)));
for (unsigned r = (c < startc ? startr : 0); r < err_rows; r++) {
prediction_errors_(r, c) = err(rng);
}
}
}
mean_beta_draws_ = draws;
MatrixXd results = MatrixXd::Zero(X.rows(), g.size());
VectorXd ydraw;
for (unsigned i = 0; i < draws; i++) {
ydraw = X * prediction_draws_.col(i).head(K_) + prediction_errors_.col(i).head(X.rows());
for (unsigned t = 0; t < ydraw.size(); t++) for (unsigned gi = 0; gi < g.size(); gi++) {
results(t, gi) += g[gi](ydraw[t]);
}
}
return Xi * mean_beta_;
return results / draws;
}
MatrixXd BayesianLinear::predictGeneric(const Ref<const MatrixXd> &X, const std::function<double(double)> &g, unsigned draws) {
std::vector<std::function<double(double)>> gvect;
gvect.push_back(g);
return predictGeneric(X, gvect, draws);
}
MatrixXd BayesianLinear::predictGeneric(const Ref<const MatrixXd> &X, const std::function<double(double)> &g0, const std::function<double(double)> &g1, unsigned draws) {
std::vector<std::function<double(double)>> gvect;
gvect.push_back(g0);
gvect.push_back(g1);
return predictGeneric(X, gvect, draws);
}
const VectorXd& BayesianLinear::draw() {
......@@ -166,7 +222,7 @@ const VectorXd& BayesianLinear::draw() {
// \]
// To draw this, first draw a gamma-distributed "h" value (store its inverse)
last_draw_[K_] = 1.0 / std::gamma_distribution<double>(n_/2, 2/(s2_*n_))(rng);
last_draw_[K_] = n_ * s2_ / std::chi_squared_distribution<double>(n_)(rng);
// Now use that to draw a multivariate normal conditional on h, with mean beta and variance
// h^{-1} V; this is the beta portion of the draw:
......@@ -188,13 +244,22 @@ VectorXd BayesianLinear::multivariateNormal(const Ref<const VectorXd> &mu, const
return mu + L * (s * z);
}
VectorXd BayesianLinear::multivariateT(const Ref<const VectorXd> &mu, double nu, const Ref<const MatrixXd> &L, double s) {
VectorXd y(multivariateNormal(VectorXd::Zero(mu.size()), L, s));
double u = std::chi_squared_distribution<double>(nu)(Random::rng());
return mu + std::sqrt(nu / u) * y;
}
const VectorXd& BayesianLinear::lastDraw() const {
return last_draw_;
}
void BayesianLinear::discard() {
NO_EMPTY_MODEL;
mean_beta_draws_ = 0;
if (prediction_draws_.cols() > 0) prediction_draws_.resize(NoChange, 0);
if (prediction_errors_.cols() > 0) prediction_errors_.resize(0, 0);
}
std::ostream& operator<<(std::ostream &os, const BayesianLinear &b) {
......@@ -294,7 +359,7 @@ void BayesianLinear::updateInPlace(const Ref<const VectorXd> &y, const Ref<const
MatrixXd XtX = (noninf_X_->transpose() * *noninf_X_).selfadjointView<Lower>();
auto qr = XtX.fullPivHouseholderQr();
if (qr.rank() >= K()) {
beta_ = noninf_X_->jacobiSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(*noninf_y_);
beta_ = noninf_X_->jacobiSvd(ComputeThinU | ComputeThinV).solve(*noninf_y_);
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
V_inv_ = std::move(XtX);
#else
......@@ -405,13 +470,13 @@ void BayesianLinear::weakenInPlace(const double stdev_scale) {
if (V_chol_L_.unique())
*V_chol_L_ *= stdev_scale;
else
V_chol_L_.reset(new Eigen::MatrixXd(*V_chol_L_ * stdev_scale));
V_chol_L_.reset(new MatrixXd(*V_chol_L_ * stdev_scale));
}
if (V_inv_chol_L_) {
if (V_inv_chol_L_.unique())
*V_inv_chol_L_ /= stdev_scale;
else
V_inv_chol_L_.reset(new Eigen::MatrixXd(*V_inv_chol_L_ / stdev_scale));
V_inv_chol_L_.reset(new MatrixXd(*V_inv_chol_L_ / stdev_scale));
}
return;
......@@ -435,7 +500,7 @@ const VectorXd& BayesianLinear::noninfYData() const {
void BayesianLinear::reset() {
if (last_draw_.size() > 0) last_draw_.resize(0);
mean_beta_draws_ = 0;
discard();
}
BayesianLinear::draw_failure::draw_failure(const std::string &what) : std::runtime_error(what) {}
......
......@@ -209,7 +209,10 @@ const VectorXd& BayesianLinearRestricted::drawGibbs() {
: new MatrixXdR(s * R() * Ainv));
const auto &D = *gibbs_D_;
if (not gibbs_r_Rbeta_) gibbs_r_Rbeta_.reset(new VectorXd(r() - R() * beta_));
if (not gibbs_r_Rbeta_) gibbs_r_Rbeta_.reset(
restrict_size_ == 0
? new VectorXd(0)
: new VectorXd(r() - R() * beta_));
const auto &r_minus_Rbeta_ = *gibbs_r_Rbeta_;
if (not gibbs_last_z_) {
......@@ -252,6 +255,7 @@ const VectorXd& BayesianLinearRestricted::drawGibbs() {
if (restrict_size_ == 0) {
// If no restrictions, simple: just draw it from the unconditional distribution
// NB: gamma(n/2,2) === chisq(v)
// (Note: s2_ gets incorporated into this later)
sigma = std::sqrt(n_ / chisq(rng));
}
else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment