Boost C++ Libraries: Ticket #12959: Regex class negation https://svn.boost.org/trac10/ticket/12959 <p> Pertains to boost::regex Tested on version 1.61 </p> <p> Flags: Perl<br /> Target string: abc092efg<br /> Regex: <code>[^\W\D]+</code><br /> </p> <p> Function: regex_search<br /> </p> <p> Matches: abc092efg<br /> </p> <p> Should match: 092<br /> </p> <p> Notes<br /> </p> <p> Negative class resolution: 'Not-Not Word' AND 'Not-Not Digit'<br /> The intersection of word AND digits is digits.<br /> </p> <p> Every other regex engine does this correctly.<br /> This includes Perl, PCRE, JS, C++11, Python, etc..<br /> </p> <p> In this engine, <code>[^\W\D]</code> matches what <code>[\w\d]</code> does.<br /> <code>[^\W\D]</code> appears not to be an intersection as the operator in<br /> a negative class is AND.<br /> </p> <p> Fwiw - this behavior is seen with all negated shorthand elements of a<br /> negative class, i.e. <code>[^\S\W]</code> matches all whitespace OR all word char's.<br /> </p> en-us Boost C++ Libraries /htdocs/site/boost.png https://svn.boost.org/trac10/ticket/12959 Trac 1.4.3 robic@… Tue, 11 Apr 2017 22:36:06 GMT <link>https://svn.boost.org/trac10/ticket/12959#comment:1 </link> <guid isPermaLink="false">https://svn.boost.org/trac10/ticket/12959#comment:1</guid> <description> <p> A solution could be to keep a vector of individual class<br /> instead of a mask for all classes <em>cnclasses</em><br /> </p> <p> <strong>Only in a negated class, and only negative classes need to be tracked.<br /></strong> The rest remains unchanged. </p> <p> Something like this works (tested):<br /> </p> <p> &lt;boost\regex\v4\basic_regex_creator.hpp&gt;<br /> </p> <pre class="wiki">template &lt;class charT, class traits&gt; class basic_char_set { private: std::vector&lt;unsigned __int64&gt; m_NegNeg_Class; public: typedef typename std::vector&lt;uint64_t&gt;::const_iterator cNNclass_list_iterator; bool has_NegNegClasses()const { return m_NegNeg_Class.size() &gt; 0 ? true : false; } cNNclass_list_iterator cn_begin()const { return m_NegNeg_Class.begin(); } cNNclass_list_iterator cn_end()const { return m_NegNeg_Class.end(); } void add_negated_class(m_type m) { if ( m_negate ) { // if it's not already there, add it .. bool bDoAdd = true; for ( int i = 0; i &lt; m_NegNeg_Class.size(); i++ ) { if ( m_NegNeg_Class[i] == (unsigned __int64)m ) { bDoAdd = false; break; } } if ( bDoAdd ) m_NegNeg_Class.insert( m_NegNeg_Class.end(), (unsigned __int64)m ); } else m_negated_classes |= m; m_empty = false; } } template &lt;class charT, class traits&gt; re_syntax_base* basic_regex_creator&lt;charT, traits&gt;::append_set( const basic_char_set&lt;charT, traits&gt;&amp; char_set, mpl::false_*) { typedef typename basic_char_set&lt;uint64_t, traits&gt;::prop_list_iterator cNegNegClas_item_iterator; result-&gt;cNegNegClasses = 0; if ( char_set.is_negated() ) { result-&gt;cNegNegClasses = static_cast&lt;uint32_t&gt;(::boost::BOOST_REGEX_DETAIL_NS::distance(char_set.cn_begin(), char_set.cn_end())); } // // now extend with all the negated negative character classes: // if ( result-&gt;isnot == true ) { cNegNegClas_item_iterator cnfirst, cnlast; cnfirst = char_set.cn_begin(); cnlast = char_set.cn_end(); while(cnfirst != cnlast) { uint64_t* p = static_cast&lt;uint64_t*&gt;(this-&gt;m_pdata-&gt;m_data.extend(sizeof(uint64_t) * 1)); p[0] = *cnfirst; if(flags() &amp; regbase::icase) { // adjust class as needed: if(((p[0] &amp; m_lower_mask) == m_lower_mask) || ((p[0] &amp; m_upper_mask) == m_upper_mask)) p[0] |= m_alpha_mask; } ++cnfirst; } } } </pre><p> &lt;boost\regex\v4\perl_matcher.hpp&gt; </p> <pre class="wiki">template &lt;class iterator, class charT, class traits_type, class char_classT&gt; iterator BOOST_REGEX_CALL re_is_set_member(iterator next, iterator last, const re_set_long&lt;char_classT&gt;* set_, const regex_data&lt;charT, traits_type&gt;&amp; e, bool icase) { // try and match a single character from the neg-neg classes if ( set_-&gt;cNegNegClasses &amp;&amp; set_-&gt;isnot ) { for(i = 0; i &lt; set_-&gt;cNegNegClasses; i++) { uint64_t mask = *((uint64_t*)p); if(traits_inst.isctype(col, (mask_type)mask) == false) return set_-&gt;isnot ? next : ++next; p += (sizeof(uint64_t*) / sizeof(charT)); } } // the rest unchanged if( set_-&gt;cclasses != 0 ) { if(traits_inst.isctype(col, set_-&gt;cclasses) == true) return set_-&gt;isnot ? next : ++next; } if( set_-&gt;cnclasses != 0 ) { if(traits_inst.isctype(col, set_-&gt;cnclasses) == false) return set_-&gt;isnot ? next : ++next; } return set_-&gt;isnot ? ++next : next; } </pre> </description> <category>Ticket</category> </item> <item> <author>robic@…</author> <pubDate>Sat, 15 Apr 2017 17:15:54 GMT</pubDate> <title/> <link>https://svn.boost.org/trac10/ticket/12959#comment:2 </link> <guid isPermaLink="false">https://svn.boost.org/trac10/ticket/12959#comment:2</guid> <description> <p> Sorry, were some typo's in my last comment. Result, as is actually used now: </p> <p> &lt;boost\regex\v4\basic_regex_creator.hpp&gt; </p> <p> </p> <pre class="wiki"> if ( m_negate ) { // if it's not already there, add it .. if ( false == (std::find(m_NegNeg_Class.begin(), m_NegNeg_Class.end(), m) != m_NegNeg_Class.end()) ) m_NegNeg_Class.insert( m_NegNeg_Class.end(), (unsigned __int64)m ); } else m_negated_classes |= m; m_empty = false; </pre><p> &lt;boost\regex\v4\perl_matcher.hpp&gt; </p> <p> </p> <pre class="wiki"> // try and match a single character from the neg-neg classes if ( set_-&gt;cNegNegClasses ) //&amp;&amp; set_-&gt;isnot ) { for(i = 0; i &lt; set_-&gt;cNegNegClasses; i++) { uint64_t mask = *((uint64_t*)p); if(traits_inst.isctype(col, (mask_type)mask) == false) return set_-&gt;isnot ? next : ++next; p += (sizeof(uint64_t) / sizeof(charT)); } } </pre><p> </p> </description> <category>Ticket</category> </item> <item> <dc:creator>John Maddock</dc:creator> <pubDate>Thu, 03 Aug 2017 17:06:48 GMT</pubDate> <title/> <link>https://svn.boost.org/trac10/ticket/12959#comment:3 </link> <guid isPermaLink="false">https://svn.boost.org/trac10/ticket/12959#comment:3</guid> <description> <p> Thanks, I fear you are correct :( </p> </description> <category>Ticket</category> </item> </channel> </rss>