series-quantiles.cpp 5.59 KB
Newer Older
1
#include "creativity/data/CSVParser.hpp"
2
#include "creativity/cmdargs/SeriesQuantiles.hpp"
3
#include "creativity/data/quantiles.hpp"
4 5 6 7 8 9 10 11 12 13 14 15 16 17
#include <boost/filesystem/operations.hpp>
#include <exception>
#include <string>
#include <iostream>

namespace creativity { namespace state { class State; } }

using namespace creativity;
using namespace creativity::data;
using namespace eris;

namespace fs = boost::filesystem;

int main(int argc, char *argv[]) {
18
    cmdargs::SeriesQuantiles args;
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
    try {
        args.parse(argc, argv);
    }
    catch (const std::exception &e) {
        std::cerr << "\n" << e.what() << "\n\n";
        exit(5);
    }

    std::istringstream iss(args.quantiles);
    std::set<double> quantiles;
    bool invalid = false;
    std::string quantile;
    while (std::getline(iss, quantile, ',')) {
        std::size_t pos;
        if (quantile == "max" or quantile == "maximum") quantiles.insert(1);
        else if (quantile == "min" or quantile == "minimum") quantiles.insert(0);
        else if (quantile == "median") quantiles.insert(0.5);
        else {
            try {
                double q = std::stod(quantile, &pos);
                if (pos != quantile.size() or q < 0 or q > 1) throw std::invalid_argument("invalid quantile value");
                quantiles.insert(q);
            } catch (const std::invalid_argument&) {
                std::cerr << "Error: requested quantile `" << quantile << "' is invalid\n";
                invalid = true;
            }
        }
    }

    if (invalid) {
        std::cerr << "Invalid quantile(s) provided; aborting.\n\n";
        exit(1);
    }
52 53 54 55
    else if (quantiles.empty()) {
        std::cerr << "No quantiles provided; aborting.\n\n";
        exit(1);
    }
56 57 58 59 60

    std::string output_header;
    {
        std::ostringstream headerss;
        headerss << "t";
61
        for (auto q : quantiles) headerss << "," << quantile_field(q);
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
        headerss << "\n";
        output_header = headerss.str();
    }

    if (args.output_prefix == args.output_unprefix) {
        std::cerr << "Invalid arguments: --prefix and --unprefix cannot be the same.\n\n";
        exit(4);
    }

    if (args.input.empty()) {
        std::cerr << "No series input files specified!\n\n";
        exit(2);
    }

    std::vector<std::string> output;
    output.reserve(args.input.size());
    for (const auto &input : args.input) {
        fs::path input_path(input);
        if (not fs::is_regular_file(input_path)) {
            std::cerr << "Error: input file `" << input << "' does not exist or is not a regular file\n";
            exit(3);
        }
        fs::path parent = input_path.parent_path();
        std::string filename = input_path.filename().string();
        bool changed_name = false;
        if (not args.output_unprefix.empty() and filename.substr(0, args.output_unprefix.size()) == args.output_unprefix) {
            filename = filename.substr(args.output_unprefix.size());
            changed_name = true;
        }
        if (not args.output_prefix.empty()) {
            filename = args.output_prefix + filename;
            changed_name = true;
        }
        if (not changed_name) {
            std::cerr << "Error: --prefix/--unprefix settings didn't imply a different output filename for `" << input << "': aborting.\n\n";
            exit(6);
        }
        fs::path output_path = parent/filename;
        std::string output_file = output_path.string();

        std::cout << input << " -> " << output_file << "..." << std::flush;

        CSVParser parser(input);
        std::ostringstream output_data;
        output_data << output_header;
        try {
108
            if (parser.fields().size() == 0) throw std::invalid_argument("no data fields found (not even t)");
109
            if (parser.fields()[0] != "t") throw std::invalid_argument("first field != t");
110
            if (parser.fields().size() < 2) throw std::invalid_argument("file contains no data (only a t column was found)");
111

112 113 114 115 116 117
            for (const auto &row : parser) {
                // Extract finite values (ignore any NaNs or infinities)
                std::vector<double> finite_values;
                std::copy_if(row.begin()+1, row.end(), std::back_inserter(finite_values), [](const double &v) { return std::isfinite(v); });
                if (finite_values.empty()) continue; // Completely skip time periods with zero finite values (often initial rows, possibly others)
                std::sort(finite_values.begin(), finite_values.end());
118

119
                output_data << row[0];
120
                for (auto q : quantiles) {
121
                    output_data << "," << double_str(data::quantile(finite_values.begin(), finite_values.end(), q), args.double_precision);
122
                }
123
                output_data << "\n";
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
            }
        }
        catch (const std::invalid_argument &e) {
            std::cerr << "\n\nError: input file `" << input << "' does not appear to be a valid creativity-series file: " << e.what() << ".  Aborting.\n\n";
            exit(8);
        }

        if (fs::exists(output_path)) {
            if (fs::equivalent(output_path, input_path)) {
                std::cerr << "\n\nError: output file and input file are the same for input file `" << input << "'; aborting.\n\n";
                exit(7);
            }
            else if (not args.overwrite) {
                std::cerr << "\n\nError: output file `" << output_file << "' already exists, and --overwrite was not specified.  Aborting.\n\n";
                exit(7);
            }
        }

        std::ofstream out(output_file, std::ios::out | std::ios::trunc);
        out << output_data.str();
        out.close();
        std::cout << " done.\n";
    }
}