commit 6cc523c83fa2461ef9f5d288559343da04e42bcf
Author: Daniel James <daniel@calamity.org.uk>
Date: Thu Apr 15 00:30:02 2010 +0100
Ignore links in comments.
diff --git a/tools/inspect/link_check.cpp b/tools/inspect/link_check.cpp
index 71d684a..bc03d63 100644
|
a
|
b
|
namespace fs = boost::filesystem;
|
| 20 | 20 | namespace |
| 21 | 21 | { |
| 22 | 22 | boost::regex html_bookmark_regex( |
| 23 | | "<([^\\s<>]*)\\s*[^<>]*\\s+(?:NAME|ID)\\s*=\\s*(['\"])(.*?)\\2", |
| | 23 | "<([^\\s<>]*)\\s*[^<>]*\\s+(?:NAME|ID)\\s*=\\s*(['\"])(.*?)\\2" |
| | 24 | "|<!--.*?-->", |
| 24 | 25 | boost::regbase::normal | boost::regbase::icase); |
| 25 | 26 | boost::regex html_url_regex( |
| 26 | 27 | "<([^\\s<>]*)\\s*[^<>]*\\s+(?:HREF|SRC)" // HREF or SRC |
| 27 | | "\\s*=\\s*(['\"])(.*?)\\2", |
| | 28 | "\\s*=\\s*(['\"])(.*?)\\2" |
| | 29 | "|<!--.*?-->", |
| 28 | 30 | boost::regbase::normal | boost::regbase::icase); |
| 29 | 31 | boost::regex css_url_regex( |
| 30 | | "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)", |
| | 32 | "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)" |
| | 33 | "|/\\*.*?\\*/", |
| 31 | 34 | boost::regbase::normal | boost::regbase::icase); |
| 32 | 35 | |
| 33 | 36 | // Regular expression for parsing URLS from: |
| … |
… |
namespace boost
|
| 152 | 155 | // a_what[1] contains the tag iterators. |
| 153 | 156 | // a_what[3] contains the bookmark iterators. |
| 154 | 157 | |
| 155 | | string tag( a_what[1].first, a_what[1].second ); |
| 156 | | boost::algorithm::to_lower(tag); |
| 157 | | |
| 158 | | if ( tag != "meta" ) |
| | 158 | if (a_what[3].matched) |
| 159 | 159 | { |
| 160 | | string bookmark( a_what[3].first, a_what[3].second ); |
| 161 | | bookmarks.insert( bookmark ); |
| 162 | | // std::cout << "******************* " << bookmark << '\n'; |
| 163 | | |
| 164 | | // w3.org recommends case-insensitive checking for duplicate bookmarks |
| 165 | | // since some browsers do a case-insensitive match. |
| 166 | | string bookmark_lowercase( bookmark ); |
| 167 | | boost::algorithm::to_lower(bookmark_lowercase); |
| 168 | | |
| 169 | | std::pair<bookmark_set::iterator, bool> result |
| 170 | | = bookmarks_lowercase.insert( bookmark_lowercase ); |
| 171 | | if (!result.second) |
| | 160 | string tag( a_what[1].first, a_what[1].second ); |
| | 161 | boost::algorithm::to_lower(tag); |
| | 162 | |
| | 163 | if ( tag != "meta" ) |
| 172 | 164 | { |
| 173 | | ++m_duplicate_bookmark_errors; |
| 174 | | int ln = std::count( contents.begin(), a_what[3].first, '\n' ) + 1; |
| 175 | | error( library_name, full_path, "Duplicate bookmark: " + bookmark, ln ); |
| | 165 | string bookmark( a_what[3].first, a_what[3].second ); |
| | 166 | bookmarks.insert( bookmark ); |
| | 167 | // std::cout << "******************* " << bookmark << '\n'; |
| | 168 | |
| | 169 | // w3.org recommends case-insensitive checking for duplicate bookmarks |
| | 170 | // since some browsers do a case-insensitive match. |
| | 171 | string bookmark_lowercase( bookmark ); |
| | 172 | boost::algorithm::to_lower(bookmark_lowercase); |
| | 173 | |
| | 174 | std::pair<bookmark_set::iterator, bool> result |
| | 175 | = bookmarks_lowercase.insert( bookmark_lowercase ); |
| | 176 | if (!result.second) |
| | 177 | { |
| | 178 | ++m_duplicate_bookmark_errors; |
| | 179 | int ln = std::count( contents.begin(), a_what[3].first, '\n' ) + 1; |
| | 180 | error( library_name, full_path, "Duplicate bookmark: " + bookmark, ln ); |
| | 181 | } |
| 176 | 182 | } |
| 177 | 183 | } |
| 178 | 184 | |
| … |
… |
namespace boost
|
| 195 | 201 | // what[0] contains the whole string iterators. |
| 196 | 202 | // what[1] contains the element type iterators. |
| 197 | 203 | // what[3] contains the URL iterators. |
| | 204 | |
| | 205 | if(what[3].matched) |
| | 206 | { |
| | 207 | string type( what[1].first, what[1].second ); |
| | 208 | boost::algorithm::to_lower(type); |
| 198 | 209 | |
| 199 | | string type( what[1].first, what[1].second ); |
| 200 | | boost::algorithm::to_lower(type); |
| 201 | | |
| 202 | | // TODO: Complain if 'link' tags use external stylesheets. |
| 203 | | do_url( string( what[3].first, what[3].second ), |
| 204 | | library_name, full_path, no_link_errors, |
| 205 | | type == "a" || type == "link", contents.begin(), what[3].first ); |
| | 210 | // TODO: Complain if 'link' tags use external stylesheets. |
| | 211 | do_url( string( what[3].first, what[3].second ), |
| | 212 | library_name, full_path, no_link_errors, |
| | 213 | type == "a" || type == "link", contents.begin(), what[3].first ); |
| | 214 | } |
| 206 | 215 | |
| 207 | 216 | start = what[0].second; // update search position |
| 208 | 217 | flags |= boost::match_prev_avail; // update flags |
| … |
… |
namespace boost
|
| 214 | 223 | { |
| 215 | 224 | // what[0] contains the whole string iterators. |
| 216 | 225 | // what[2] contains the URL iterators. |
| 217 | | do_url( string( what[2].first, what[2].second ), |
| 218 | | library_name, full_path, no_link_errors, false, |
| 219 | | contents.begin(), what[3].first ); |
| | 226 | |
| | 227 | if(what[2].matched) |
| | 228 | { |
| | 229 | do_url( string( what[2].first, what[2].second ), |
| | 230 | library_name, full_path, no_link_errors, false, |
| | 231 | contents.begin(), what[3].first ); |
| | 232 | } |
| 220 | 233 | |
| 221 | 234 | start = what[0].second; // update search position |
| 222 | 235 | flags |= boost::match_prev_avail; // update flags |