| 1 | #include <iostream>
|
|---|
| 2 | #include <memory>
|
|---|
| 3 | #include <boost/regex.hpp>
|
|---|
| 4 |
|
|---|
| 5 | int main(int argc, char* argv[])
|
|---|
| 6 | {
|
|---|
| 7 | std::istream& inputstream = std::cin;
|
|---|
| 8 | boost::regex regex(argv[1]);
|
|---|
| 9 |
|
|---|
| 10 | std::size_t buffersize = 4; // for demonstration, normally use something like 4096
|
|---|
| 11 | std::size_t used_buffer = 0;
|
|---|
| 12 | // use unique pointer instead of "plain" pointer so that there is no memory leak in case of exception
|
|---|
| 13 | std::unique_ptr<char[]> buffer(new char[buffersize]);
|
|---|
| 14 | do
|
|---|
| 15 | {
|
|---|
| 16 | inputstream.read(buffer.get() + used_buffer, buffersize - used_buffer);
|
|---|
| 17 | char const* const buffer_end = buffer.get() + used_buffer + inputstream.gcount();
|
|---|
| 18 | char const* buffer_handled_until = buffer.get();
|
|---|
| 19 |
|
|---|
| 20 | // the whole following thing could be much easier by using a bidirectional input iterator here, but:
|
|---|
| 21 | // do not do this because input "file" could be a named pipe, stream or similar (no backwards iterating would be possible!)
|
|---|
| 22 | // so you have to manage the buffer (and release parts of it) yourself
|
|---|
| 23 | // for a more efficient solution (hopefully in the near future) see <https://svn.boost.org/trac/boost/ticket/11776>
|
|---|
| 24 | boost::cregex_iterator curr_match(buffer.get(), buffer_end, regex, boost::match_default | boost::match_partial);
|
|---|
| 25 | // add element to set when ...
|
|---|
| 26 | while (curr_match != boost::cregex_iterator() &&
|
|---|
| 27 | curr_match->begin()->matched && // ... match is a full match and ...
|
|---|
| 28 | (!inputstream || // ... when file is at end or ...
|
|---|
| 29 | // (see next line) when match does not touch end of buffer (otherwise element could be longer, e. g. partial match)
|
|---|
| 30 | !boost::regex_match(curr_match->begin()->first, buffer_end, regex, boost::match_default | boost::match_partial)))
|
|---|
| 31 | {
|
|---|
| 32 | std::cout << curr_match->str() << "\n";
|
|---|
| 33 | ++curr_match;
|
|---|
| 34 | }
|
|---|
| 35 |
|
|---|
| 36 | /* while (curr_match != boost::cregex_iterator())
|
|---|
| 37 | {
|
|---|
| 38 | std::cout << curr_match->str() << " " << curr_match->size() << " " << (*curr_match)[1].matched << " " << ((*curr_match)[1].second - (*curr_match)[1].first) << "\n";
|
|---|
| 39 | ++curr_match;
|
|---|
| 40 | }
|
|---|
| 41 | */
|
|---|
| 42 |
|
|---|
| 43 | // the last match is always a partial match except full match touches buffer end (or buffer is empty)
|
|---|
| 44 | // so mark begin of last match as new begin of buffer when filling it up in next round of do-while-loop
|
|---|
| 45 | buffer_handled_until = (curr_match != boost::cregex_iterator() ? curr_match->begin()->first : buffer_end);
|
|---|
| 46 |
|
|---|
| 47 | used_buffer = buffer_end - buffer_handled_until;
|
|---|
| 48 | if (buffer_handled_until == buffer.get())
|
|---|
| 49 | {
|
|---|
| 50 | // if current element fills the whole buffer, buffer is too small and thus doubled
|
|---|
| 51 | buffersize *= 2;
|
|---|
| 52 | std::unique_ptr<char[]> new_buffer(new char[buffersize]);
|
|---|
| 53 | std::memmove(new_buffer.get(), buffer_handled_until, used_buffer);
|
|---|
| 54 | buffer = std::move(new_buffer);
|
|---|
| 55 | }
|
|---|
| 56 | else
|
|---|
| 57 | {
|
|---|
| 58 | // move the rest of new element (buffer_handled_until) to beginning of buffer and mark it as used
|
|---|
| 59 | std::memmove(buffer.get(), buffer_handled_until, used_buffer);
|
|---|
| 60 | }
|
|---|
| 61 | } while (inputstream);
|
|---|
| 62 | }
|
|---|