// Boost.Regex partial_match implementation is incorrect // // Submitted by Dr. Robert van Engelen, engelen@genivia.com // Related Ticket: #11776 // Affects: RE/flex project https://sourceforge.net/projects/re-flex/ // // Problem: Using iterators with partial_match to search incremental input // (such as interactive input) and searching long patterns in files is // impossible when greedy repetition patterns are used (*, +, etc). // Repetitions may behave as lazy repetitions (*?, +?, etc). #include #include #include #include #include #include void search(std::istream& is, const boost::regex& e, unsigned n) { // buffer we'll be searching in: char buf[4096]; // current position const char *cur = buf; // end of filled buffer text char *pos = buf; // flag to indicate whether there is more input to come: bool have_more = true; // flags boost::match_flag_type flg = boost::match_default | boost::regex_constants::match_not_null | boost::match_partial; // iterator end boost::cregex_iterator b; while (have_more) { // fill more of the buffer by adding n chars is.read(pos, n); unsigned read = is.gcount(); // check to see if we've run out of text: have_more = read == n; // update the end of filled buffer text pos += read; // and then iterate: boost::cregex_iterator a( cur, pos, e, flg); // while more matches while (a != b) { if ((*a)[0].matched == false) { // Partial match break; } else if ((*a)[0].second < pos || !have_more) // as per documentation { // full match: std::string full((*a)[0].first, (*a)[0].second - (*a)[0].first); std::cout << "matched: " << full << std::endl; size_t cap, groups = (*a).size(); for (cap = 1; cap < groups && !(*a)[cap].matched; ++cap) continue; if (cap < groups) std::cout << "group = " << cap << std::endl; // save next current position cur = (*a)[0].second; } // move to next match: ++a; } } } int main() { boost::regex e("a.*c"); std::stringstream s("abc abc"); // read blocks of 100 chars at a time, OK because match fits in 100 bytes std::cout << "\nCorrectly "; search(s, e, 100); // read by one char at a time (interactive), pattern behaves as a lazy .*? std::cout << "\nIncorrectly "; s.clear(); s.seekg(0); search(s, e, 1); return 0; }