Commit 137f7496 authored by Jason Rhinelander's avatar Jason Rhinelander

Updated creativity-results to support policies

There are still problems here: this fixes enough for the data summary
to support policy filtering.
parent 89d5145b
......@@ -12,18 +12,18 @@ Treatment::Treatment(const std::string &filename) : Treatment(CSVParser(filename
void Treatment::readCSV(CSVParser &&csv) {
// The data contains values like pre.whatever, piracy.whatever, piracy.SR.whatever,
// public.whatever, public.SR.whatever. We need to convert those into five rows:
// - one with `whatever' set to pre.whatever and piracy=public=SR=0, LR=1
// - one with `whatever' set to piracy.SR.whatever, piracy=1, public=0, SR=1, LR=0
// - one with `whatever' set to piracy.whatever, piracy=1, public=0, SR=0, LR=1
// - one with `whatever' set to public.SR.whatever, piracy=0, public=1, SR=1, LR=0
// - one with `whatever' set to public.whatever, piracy=0, public=1, SR=0, LR=1
// policy.whatever, policy.SR.whatever. We need to convert those into five rows:
// - one with `whatever' set to pre.whatever and piracy=policy=SR=0, LR=1
// - one with `whatever' set to piracy.SR.whatever, piracy=1, policy=0, SR=1, LR=0
// - one with `whatever' set to piracy.whatever, piracy=1, policy=0, SR=0, LR=1
// - one with `whatever' set to policy.SR.whatever, piracy=0, policy=1, SR=1, LR=0
// - one with `whatever' set to policy.whatever, piracy=0, policy=1, SR=0, LR=1
//
// The data might not, however, have any piracy and/or public and/or short run data, in which
// The data might not, however, have any piracy and/or policy and/or short run data, in which
// case we omit the relevant row(s).
//
// This maps every CSV field (by name) into a column. The mapping isn't unique, however: any
// pre.whatever, piracy.whatever, and public.whatever all map to "whatever", albeit with
// pre.whatever, piracy.whatever, and policy.whatever all map to "whatever", albeit with
// different rows, as described above.
//
// Fields starting with "param." are left as-is; anything else must have one of the "prefix."
......@@ -40,9 +40,9 @@ void Treatment::readCSV(CSVParser &&csv) {
std::string data_field;
if (f.substr(0, 4) == "pre.") { data_field = f.substr(4); has_pre_ = true; }
else if (f.substr(0, 10) == "piracy.SR.") { data_field = f.substr(10); has_piracy_sr_ = true; }
else if (f.substr(0, 10) == "public.SR.") { data_field = f.substr(10); has_public_sr_ = true; }
else if (f.substr(0, 10) == "policy.SR.") { data_field = f.substr(10); has_policy_sr_ = true; }
else if (f.substr(0, 7) == "piracy.") { data_field = f.substr(7); has_piracy_ = true; }
else if (f.substr(0, 7) == "public.") { data_field = f.substr(7); has_public_ = true; }
else if (f.substr(0, 7) == "policy.") { data_field = f.substr(7); has_policy_ = true; }
else if (f.substr(0, 6) == "param.") { data_field = f; } // leave leading param.
else { throw std::runtime_error("CSV file has invalid/unknown field `" + f + "': fields must have a (known) prefix"); }
......@@ -57,19 +57,19 @@ void Treatment::readCSV(CSVParser &&csv) {
}
// The pre-data plus each treatment effect contributes an output row
rows_per_sim_ = has_pre_ + has_piracy_ + has_piracy_sr_ + has_public_ + has_public_sr_;
rows_per_sim_ = has_pre_ + has_piracy_ + has_piracy_sr_ + has_policy_ + has_policy_sr_;
if (rows_per_sim_ == 0) throw std::runtime_error("CSV file contains no usable data (no pre, piracy, or public observations found)");
if (rows_per_sim_ == 0) throw std::runtime_error("CSV file contains no usable data (no pre, piracy, or policy observations found)");
// Check required data
requirePre(require_pre_);
requirePiracy(require_piracy_);
requirePublic(require_public_);
requirePolicy(require_policy_);
requireSR(require_sr_);
// Add dummy columns (even if the data means they will always be 0/1)
data_column_.insert({"piracy", next_col++}); // piracy dummy
data_column_.insert({"public", next_col++}); // public sharing dummy
data_column_.insert({"policy", next_col++}); // policy sharing dummy
data_column_.insert({"SR", next_col++}); // short-run stage dummy
data_column_.insert({"LR", next_col++}); // long-run stage dummy
......@@ -90,10 +90,10 @@ void Treatment::readCSV(CSVParser &&csv) {
if (has_piracy_sr_) generateRow(csv, data_.row(pos++), "piracy.SR.");
// Third: long run piracy
if (has_piracy_) generateRow(csv, data_.row(pos++), "piracy.");
// Fourth: short run public
if (has_public_sr_) generateRow(csv, data_.row(pos++), "public.SR.");
// Fifth: long run public
if (has_public_) generateRow(csv, data_.row(pos++), "public.");
// Fourth: short run policy
if (has_policy_sr_) generateRow(csv, data_.row(pos++), "policy.SR.");
// Fifth: long run policy
if (has_policy_) generateRow(csv, data_.row(pos++), "policy.");
source_.push_back(csv.rowSkipped().at("source"));
}
......@@ -124,7 +124,7 @@ void Treatment::generateRow(
for (const auto &field : data_column_) {
// Look for special dummies:
if (field.first == "piracy") newrow[field.second] = (prefix.substr(0, 7) == "piracy.");
else if (field.first == "public") newrow[field.second] = (prefix.substr(0, 7) == "public.");
else if (field.first == "policy") newrow[field.second] = (prefix.substr(0, 7) == "policy.");
else if (field.first == "LR") newrow[field.second] = (prefix.find(".SR.") == prefix.npos);
else if (field.first == "SR") newrow[field.second] = (prefix.find(".SR.") != prefix.npos);
else {
......@@ -144,12 +144,12 @@ void Treatment::require##Meth(bool req) { \
}
REQUIRE(Pre, pre, "pre-piracy")
REQUIRE(Piracy, piracy, "piracy")
REQUIRE(Public, public, "public")
REQUIRE(Policy, policy, "policy")
void Treatment::requireSR(bool req) {
require_sr_ = req;
if (have_data_ and require_sr_) {
if (has_piracy_ and not has_piracy_sr_) throw std::runtime_error("CSV file has long-run but no short-run piracy data");
if (has_public_ and not has_public_sr_) throw std::runtime_error("CSV file has long-run but no short-run public data");
if (has_policy_ and not has_policy_sr_) throw std::runtime_error("CSV file has long-run but no short-run policy data");
}
}
......
......@@ -6,8 +6,8 @@
namespace creativity { namespace data {
/** This class converts raw simulation data into multiple data rows, with piracy/public, SR/LR as
* treatment effects on the base (pre-piracy, pre-public) row. It additionally supports filtering
/** This class converts raw simulation data into multiple data rows, with piracy/policy, SR/LR as
* treatment effects on the base (pre-piracy, pre-policy) row. It additionally supports filtering
* the resulting data--for instance, only using observations that still exhibit some writing during
* piracy.
*/
......@@ -35,18 +35,18 @@ class Treatment {
*/
void requirePiracy(bool require = true);
/// Like requirePiracy(), but for public sharing data
void requirePublic(bool require = true);
/// Like requirePiracy(), but for policy data
void requirePolicy(bool require = true);
/// Like requirePiracy(), but for pre-piracy data
void requirePre(bool require = true);
/** Requires short-run data for long-run piracy/public data that exists in the source data.
/** Requires short-run data for long-run piracy/policy data that exists in the source data.
* In particular, if this option is enabled, the data must contain short-run observations
* for each category with equivalent long-run observations: that is, if there is long-run
* piracy data, there must also be short-run piracy data, and likewise for public sharing
* data. If the source data does not contain long-run piracy data, this option will not
* require short-run piracy data.
* piracy data, there must also be short-run piracy data, and likewise for policy data. If
* the source data does not contain long-run piracy data, this option will not require
* short-run piracy data.
*
* Like requirePiracy(), this throws immediately if data has already been parsed; if not,
* the exception will be raised if attempting to read data that doesn't contain the required
......@@ -60,20 +60,20 @@ class Treatment {
/// True if the source data has piracy data
const bool& hasPiracy() const { return has_piracy_; }
/// True if the source data has public data
const bool& hasPublic() const { return has_public_; }
/// True if the source data has policy data
const bool& hasPolicy() const { return has_policy_; }
/// True if the source data has short-run piracy data
const bool& hasPiracySR() const { return has_piracy_sr_; }
/// True if the source data has short-run public data
const bool& hasPublicSR() const { return has_public_sr_; }
/// True if the source data has short-run policy data
const bool& hasPolicySR() const { return has_policy_sr_; }
/** True if the source data has short-run data for each type of associated long-run data.
* That is, if the data has piracy data, it must also have short-run piracy data; likewise
* for public data.
* for policy data.
*/
bool hasShortrun() const { return (hasPublicSR() or not hasPublic()) and (hasPiracySR() or not hasPiracy()); }
bool hasShortrun() const { return (hasPolicySR() or not hasPolicy()) and (hasPiracySR() or not hasPiracy()); }
/// The number of data rows per simulation input. This equals 1 plus the number of treatments.
const unsigned int& rowsPerSimulation() const { return rows_per_sim_; }
......@@ -101,7 +101,7 @@ class Treatment {
/** Accesses the map of field name to column indices. Data fields (e.g. "books_written")
* are not prefixed with "pre.", "piracy.", etc.; parameters (e.g. "param.readers") are
* prefixed; there are also dummies "piracy", "public", "SR", and "LR".
* prefixed; there are also dummies "piracy", "policy", "SR", and "LR".
*/
const std::unordered_map<std::string, unsigned>& columns() const;
......@@ -126,11 +126,11 @@ class Treatment {
has_pre_{false}, ///< True if the data contains pre-piracy, non-treatment rows
has_piracy_{false}, ///< True if the data contains LR piracy treatment rows
has_piracy_sr_{false}, ///< True if the data contains SR piracy treatment rows
has_public_{false}, ///< True if the data contains LR public treatment rows
has_public_sr_{false}, ///< True if the data contains SR public treatment rows
has_policy_{false}, ///< True if the data contains LR policy treatment rows
has_policy_sr_{false}, ///< True if the data contains SR policy treatment rows
require_pre_{false}, ///< True if pre data is required
require_piracy_{false}, ///< True if piracy data is required
require_public_{false}, ///< True if public data is required
require_policy_{false}, ///< True if policy data is required
require_sr_{false}; ///< True if short-run data is required for each type of long-run data
/// The number of treatment row observations per source data rows (i.e. per simulation)
unsigned int rows_per_sim_{0};
......@@ -145,7 +145,7 @@ class Treatment {
*
* \param csv the CSVParser positioned at the desired row.
* \param newrow the matrix row in which values will be set
* \param prefix the prefix (such as "pre." or "public.SR.") to add to non-param. data
* \param prefix the prefix (such as "pre." or "policy.SR.") to add to non-param. data
* columns.
*/
void generateRow(const CSVParser &csv,
......
......@@ -7,11 +7,11 @@ using namespace Eigen;
TreatmentFilter::TreatmentFilter(const Treatment &source,
std::function<bool(const Properties&)> filter,
std::function<bool(bool pre, bool piracy, bool public_sharing, bool short_run)> stage_filter) {
std::function<bool(bool pre, bool piracy, bool policy, bool short_run)> stage_filter) {
std::vector<unsigned> stages;
stages.reserve(source.rowsPerSimulation());
unsigned stage_i = 0;
// Data is always added in this order: pre, piracy.SR, piracy.LR, public.SR, public.LR
// Data is always added in this order: pre, piracy.SR, piracy.LR, policy.SR, policy.LR
has_pre_ = false; // Everything else defaults to false already
if (source.hasPre()) {
if (stage_filter(true, false, false, false)) {
......@@ -34,16 +34,16 @@ TreatmentFilter::TreatmentFilter(const Treatment &source,
}
stage_i++;
}
if (source.hasPublicSR()) {
if (source.hasPolicySR()) {
if (stage_filter(false, false, true, true)) {
has_public_sr_ = true;
has_policy_sr_ = true;
stages.push_back(stage_i);
}
stage_i++;
}
if (source.hasPublic()) {
if (source.hasPolicy()) {
if (stage_filter(false, false, true, false)) {
has_public_ = true;
has_policy_ = true;
stages.push_back(stage_i);
}
stage_i++;
......@@ -62,8 +62,8 @@ TreatmentFilter::TreatmentFilter(const Treatment &source,
if (source.hasPre()) props.pre.reset(new StageProperties(source, i + j++));
if (source.hasPiracySR()) props.piracy_SR.reset(new StageProperties(source, i + j++));
if (source.hasPiracy()) props.piracy.reset(new StageProperties(source, i + j++));
if (source.hasPublicSR()) props.public_sharing_SR.reset(new StageProperties(source, i + j++));
if (source.hasPublic()) props.public_sharing.reset(new StageProperties(source, i + j++));
if (source.hasPolicySR()) props.policy_SR.reset(new StageProperties(source, i + j++));
if (source.hasPolicy()) props.policy.reset(new StageProperties(source, i + j++));
if (filter(props)) {
if (datapos >= data_.rows()) data_.conservativeResize(data_.rows() + rowincr, NoChange);
......
......@@ -28,7 +28,7 @@ namespace creativity { namespace data {
* TreatmentFilter pre_and_piracy(
* data,
* [](const TreatmentFilter::Properties&) { return true; } // No per-simulation filtering
* [](bool pre, bool piracy, bool public_sharing, bool short_run) {
* [](bool pre, bool piracy, bool policy, bool short_run) {
* return pre or (piracy and not short_run);
* }
* );
......@@ -67,11 +67,11 @@ class TreatmentFilter : public Treatment {
* depends on whether or not the field is in the source data, *not* whether or not
* the stage will be included in the filtered data. */
std::unique_ptr<const StageProperties> piracy;
/** A pointer to the Properties values for the LR public row associated with this
/** A pointer to the Properties values for the LR policy row associated with this
* row. Will be a nullptr if there is no such row. Note that whether this is set
* depends on whether or not the field is in the source data, *not* whether or not
* the stage will be included in the filtered data. */
std::unique_ptr<const StageProperties> public_sharing;
std::unique_ptr<const StageProperties> policy;
/** A pointer to the Properties values for the SR piracy row associated with this
* row. Will be a nullptr if there is no such row. Note that whether this is set
* depends on whether or not the field is in the source data, *not* whether or not
......@@ -81,7 +81,7 @@ class TreatmentFilter : public Treatment {
* row. Will be a nullptr if there is no such row. Note that whether this is set
* depends on whether or not the field is in the source data, *not* whether or not
* the stage will be included in the filtered data. */
std::unique_ptr<const StageProperties> public_sharing_SR;
std::unique_ptr<const StageProperties> policy_SR;
/// The source data simulation filename.
std::string source;
};
......@@ -100,7 +100,7 @@ class TreatmentFilter : public Treatment {
*/
TreatmentFilter(const Treatment &source,
std::function<bool(const Properties&)> filter,
std::function<bool(bool pre, bool piracy, bool public_sharing, bool short_run)> stage_filter = [](bool,bool,bool,bool) { return true; }
std::function<bool(bool pre, bool piracy, bool policy, bool short_run)> stage_filter = [](bool,bool,bool,bool) { return true; }
);
};
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment