1 | /*
|
---|
2 | * 2012.6.14 a faster version of vervet/src/mapper/CalculateMedianModeFromSAMtoolsDepthOutput.py
|
---|
3 | *
|
---|
4 | */
|
---|
5 |
|
---|
6 | #include <iostream>
|
---|
7 | #include <string>
|
---|
8 | #include <cmath> //sqrt
|
---|
9 | #include <assert.h>
|
---|
10 | #include <stdio.h>
|
---|
11 | #include <stdlib.h>
|
---|
12 | #include <ext/hash_map> //for hash_map
|
---|
13 | #include <boost/program_options.hpp> //for program options
|
---|
14 | #include <fstream>
|
---|
15 | #include <boost/tokenizer.hpp>
|
---|
16 |
|
---|
17 | #include "armadillo" //for mean/median calculation
|
---|
18 |
|
---|
19 | #include <boost/iostreams/filtering_streambuf.hpp>
|
---|
20 | #include <boost/iostreams/copy.hpp>
|
---|
21 | #include <boost/iostreams/filter/gzip.hpp>
|
---|
22 | #include <boost/random/mersenne_twister.hpp>
|
---|
23 | #include <boost/random/uniform_real.hpp>
|
---|
24 | #include <boost/random/variate_generator.hpp>
|
---|
25 | //#include <boost/generator_iterator.hpp>
|
---|
26 |
|
---|
27 | #include <boost/accumulators/accumulators.hpp>
|
---|
28 | #include <boost/accumulators/statistics/stats.hpp>
|
---|
29 | #include <boost/accumulators/statistics/mean.hpp>
|
---|
30 | #include <boost/accumulators/statistics/median.hpp>
|
---|
31 |
|
---|
32 | // This is a typedef for a random number generator.
|
---|
33 | // Try boost::mt19937 or boost::ecuyer1988 instead of boost::minstd_rand
|
---|
34 | typedef boost::mt19937 base_generator_type;
|
---|
35 |
|
---|
36 | using namespace arma;
|
---|
37 | using namespace std;
|
---|
38 | using namespace boost;
|
---|
39 | namespace ba = boost::accumulators;
|
---|
40 | namespace po = boost::program_options;
|
---|
41 |
|
---|
42 | class CalculateMedianMeanOfInputColumn{
|
---|
43 | string inputFname;
|
---|
44 | std::ifstream inputFile;
|
---|
45 | boost::iostreams::filtering_streambuf<boost::iostreams::input> input;
|
---|
46 | string outputFname;
|
---|
47 | std::ofstream outputFile;
|
---|
48 | int alignmentID;
|
---|
49 | float fractionToSample;
|
---|
50 | int noOfLinesInHeader;
|
---|
51 | int whichColumn;
|
---|
52 | int maxNumberOfSamplings;
|
---|
53 | arma::mat statList; //armadillo matrix storing all the sampled stats
|
---|
54 | __gnu_cxx::hash_map <int, int> stat2Occurrence;
|
---|
55 | int modeStat;
|
---|
56 | // Define an accumulator set for calculating the mean and the median
|
---|
57 | ba::accumulator_set<double, ba::stats<ba::tag::mean, ba::tag::median > > acc;
|
---|
58 |
|
---|
59 | public:
|
---|
60 | CalculateMedianMeanOfInputColumn(string _inputFname, string _outputFname, int _alignmentID,
|
---|
61 | float _fractionToSample, int _noOfLinesInHeader, int _whichColumn, int _maxNumberOfSamplings);
|
---|
62 | ~CalculateMedianMeanOfInputColumn();
|
---|
63 | void run();
|
---|
64 | };
|
---|