Ticket #11776: main.cpp

File main.cpp, 2.8 KB (added by der-storch-85@…, 7 years ago)

Correctly finding all regexes in stream (e. g. file) of unknown size by workaround

Line 
1#include <iostream>
2#include <memory>
3#include <boost/regex.hpp>
4
5int main(int argc, char* argv[])
6{
7 std::istream& inputstream = std::cin;
8 boost::regex regex(argv[1]);
9
10 std::size_t buffersize = 4; // for demonstration, normally use something like 4096
11 std::size_t used_buffer = 0;
12 // use unique pointer instead of "plain" pointer so that there is no memory leak in case of exception
13 std::unique_ptr<char[]> buffer(new char[buffersize]);
14 do
15 {
16 inputstream.read(buffer.get() + used_buffer, buffersize - used_buffer);
17 char const* const buffer_end = buffer.get() + used_buffer + inputstream.gcount();
18 char const* buffer_handled_until = buffer.get();
19
20 // the whole following thing could be much easier by using a bidirectional input iterator here, but:
21 // do not do this because input "file" could be a named pipe, stream or similar (no backwards iterating would be possible!)
22 // so you have to manage the buffer (and release parts of it) yourself
23 // for a more efficient solution (hopefully in the near future) see <https://svn.boost.org/trac/boost/ticket/11776>
24 boost::cregex_iterator curr_match(buffer.get(), buffer_end, regex, boost::match_default | boost::match_partial);
25 // add element to set when ...
26 while (curr_match != boost::cregex_iterator() &&
27 curr_match->begin()->matched && // ... match is a full match and ...
28 (!inputstream || // ... when file is at end or ...
29 // (see next line) when match does not touch end of buffer (otherwise element could be longer, e. g. partial match)
30 !boost::regex_match(curr_match->begin()->first, buffer_end, regex, boost::match_default | boost::match_partial)))
31 {
32 std::cout << curr_match->str() << "\n";
33 ++curr_match;
34 }
35
36/* while (curr_match != boost::cregex_iterator())
37 {
38 std::cout << curr_match->str() << " " << curr_match->size() << " " << (*curr_match)[1].matched << " " << ((*curr_match)[1].second - (*curr_match)[1].first) << "\n";
39 ++curr_match;
40 }
41*/
42
43 // the last match is always a partial match except full match touches buffer end (or buffer is empty)
44 // so mark begin of last match as new begin of buffer when filling it up in next round of do-while-loop
45 buffer_handled_until = (curr_match != boost::cregex_iterator() ? curr_match->begin()->first : buffer_end);
46
47 used_buffer = buffer_end - buffer_handled_until;
48 if (buffer_handled_until == buffer.get())
49 {
50 // if current element fills the whole buffer, buffer is too small and thus doubled
51 buffersize *= 2;
52 std::unique_ptr<char[]> new_buffer(new char[buffersize]);
53 std::memmove(new_buffer.get(), buffer_handled_until, used_buffer);
54 buffer = std::move(new_buffer);
55 }
56 else
57 {
58 // move the rest of new element (buffer_handled_until) to beginning of buffer and mark it as used
59 std::memmove(buffer.get(), buffer_handled_until, used_buffer);
60 }
61 } while (inputstream);
62}