Ticket #12619: boostbug.cpp

File boostbug.cpp, 2.5 KB (added by Dr. Robert van Engelen <engelen@…>, 6 years ago)

Small example to demonstrate the issue

Line 
1
2// Boost.Regex partial_match implementation is incorrect
3//
4// Submitted by Dr. Robert van Engelen, engelen@genivia.com
5// Related Ticket: #11776
6// Affects: RE/flex project https://sourceforge.net/projects/re-flex/
7//
8// Problem: Using iterators with partial_match to search incremental input
9// (such as interactive input) and searching long patterns in files is
10// impossible when greedy repetition patterns are used (*, +, etc).
11// Repetitions may behave as lazy repetitions (*?, +?, etc).
12
13#include <iostream>
14#include <fstream>
15#include <sstream>
16#include <string>
17#include <cctype>
18#include <boost/regex.hpp>
19
20void search(std::istream& is, const boost::regex& e, unsigned n)
21{
22 // buffer we'll be searching in:
23 char buf[4096];
24 // current position
25 const char *cur = buf;
26 // end of filled buffer text
27 char *pos = buf;
28 // flag to indicate whether there is more input to come:
29 bool have_more = true;
30 // flags
31 boost::match_flag_type flg = boost::match_default | boost::regex_constants::match_not_null | boost::match_partial;
32 // iterator end
33 boost::cregex_iterator b;
34
35 while (have_more)
36 {
37 // fill more of the buffer by adding n chars
38 is.read(pos, n);
39 unsigned read = is.gcount();
40 // check to see if we've run out of text:
41 have_more = read == n;
42 // update the end of filled buffer text
43 pos += read;
44 // and then iterate:
45 boost::cregex_iterator a(
46 cur,
47 pos,
48 e,
49 flg);
50 // while more matches
51 while (a != b)
52 {
53 if ((*a)[0].matched == false)
54 {
55 // Partial match
56 break;
57 }
58 else if ((*a)[0].second < pos || !have_more) // as per documentation
59 {
60 // full match:
61 std::string full((*a)[0].first, (*a)[0].second - (*a)[0].first);
62 std::cout << "matched: " << full << std::endl;
63 size_t cap, groups = (*a).size();
64 for (cap = 1; cap < groups && !(*a)[cap].matched; ++cap)
65 continue;
66 if (cap < groups)
67 std::cout << "group = " << cap << std::endl;
68 // save next current position
69 cur = (*a)[0].second;
70 }
71
72 // move to next match:
73 ++a;
74 }
75 }
76}
77
78int main()
79{
80 boost::regex e("a.*c");
81 std::stringstream s("abc abc");
82
83 // read blocks of 100 chars at a time, OK because match fits in 100 bytes
84 std::cout << "\nCorrectly ";
85 search(s, e, 100);
86
87 // read by one char at a time (interactive), pattern behaves as a lazy .*?
88 std::cout << "\nIncorrectly ";
89 s.clear();
90 s.seekg(0);
91 search(s, e, 1);
92
93 return 0;
94}