commit 6cc523c83fa2461ef9f5d288559343da04e42bcf
Author: Daniel James <daniel@calamity.org.uk>
Date: Thu Apr 15 00:30:02 2010 +0100
Ignore links in comments.
diff --git a/tools/inspect/link_check.cpp b/tools/inspect/link_check.cpp
index 71d684a..bc03d63 100644
a
|
b
|
namespace fs = boost::filesystem;
|
20 | 20 | namespace |
21 | 21 | { |
22 | 22 | boost::regex html_bookmark_regex( |
23 | | "<([^\\s<>]*)\\s*[^<>]*\\s+(?:NAME|ID)\\s*=\\s*(['\"])(.*?)\\2", |
| 23 | "<([^\\s<>]*)\\s*[^<>]*\\s+(?:NAME|ID)\\s*=\\s*(['\"])(.*?)\\2" |
| 24 | "|<!--.*?-->", |
24 | 25 | boost::regbase::normal | boost::regbase::icase); |
25 | 26 | boost::regex html_url_regex( |
26 | 27 | "<([^\\s<>]*)\\s*[^<>]*\\s+(?:HREF|SRC)" // HREF or SRC |
27 | | "\\s*=\\s*(['\"])(.*?)\\2", |
| 28 | "\\s*=\\s*(['\"])(.*?)\\2" |
| 29 | "|<!--.*?-->", |
28 | 30 | boost::regbase::normal | boost::regbase::icase); |
29 | 31 | boost::regex css_url_regex( |
30 | | "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)", |
| 32 | "(\\@import\\s*[\"']|url\\s*\\(\\s*[\"']?)([^\"')]*)" |
| 33 | "|/\\*.*?\\*/", |
31 | 34 | boost::regbase::normal | boost::regbase::icase); |
32 | 35 | |
33 | 36 | // Regular expression for parsing URLS from: |
… |
… |
namespace boost
|
152 | 155 | // a_what[1] contains the tag iterators. |
153 | 156 | // a_what[3] contains the bookmark iterators. |
154 | 157 | |
155 | | string tag( a_what[1].first, a_what[1].second ); |
156 | | boost::algorithm::to_lower(tag); |
157 | | |
158 | | if ( tag != "meta" ) |
| 158 | if (a_what[3].matched) |
159 | 159 | { |
160 | | string bookmark( a_what[3].first, a_what[3].second ); |
161 | | bookmarks.insert( bookmark ); |
162 | | // std::cout << "******************* " << bookmark << '\n'; |
163 | | |
164 | | // w3.org recommends case-insensitive checking for duplicate bookmarks |
165 | | // since some browsers do a case-insensitive match. |
166 | | string bookmark_lowercase( bookmark ); |
167 | | boost::algorithm::to_lower(bookmark_lowercase); |
168 | | |
169 | | std::pair<bookmark_set::iterator, bool> result |
170 | | = bookmarks_lowercase.insert( bookmark_lowercase ); |
171 | | if (!result.second) |
| 160 | string tag( a_what[1].first, a_what[1].second ); |
| 161 | boost::algorithm::to_lower(tag); |
| 162 | |
| 163 | if ( tag != "meta" ) |
172 | 164 | { |
173 | | ++m_duplicate_bookmark_errors; |
174 | | int ln = std::count( contents.begin(), a_what[3].first, '\n' ) + 1; |
175 | | error( library_name, full_path, "Duplicate bookmark: " + bookmark, ln ); |
| 165 | string bookmark( a_what[3].first, a_what[3].second ); |
| 166 | bookmarks.insert( bookmark ); |
| 167 | // std::cout << "******************* " << bookmark << '\n'; |
| 168 | |
| 169 | // w3.org recommends case-insensitive checking for duplicate bookmarks |
| 170 | // since some browsers do a case-insensitive match. |
| 171 | string bookmark_lowercase( bookmark ); |
| 172 | boost::algorithm::to_lower(bookmark_lowercase); |
| 173 | |
| 174 | std::pair<bookmark_set::iterator, bool> result |
| 175 | = bookmarks_lowercase.insert( bookmark_lowercase ); |
| 176 | if (!result.second) |
| 177 | { |
| 178 | ++m_duplicate_bookmark_errors; |
| 179 | int ln = std::count( contents.begin(), a_what[3].first, '\n' ) + 1; |
| 180 | error( library_name, full_path, "Duplicate bookmark: " + bookmark, ln ); |
| 181 | } |
176 | 182 | } |
177 | 183 | } |
178 | 184 | |
… |
… |
namespace boost
|
195 | 201 | // what[0] contains the whole string iterators. |
196 | 202 | // what[1] contains the element type iterators. |
197 | 203 | // what[3] contains the URL iterators. |
| 204 | |
| 205 | if(what[3].matched) |
| 206 | { |
| 207 | string type( what[1].first, what[1].second ); |
| 208 | boost::algorithm::to_lower(type); |
198 | 209 | |
199 | | string type( what[1].first, what[1].second ); |
200 | | boost::algorithm::to_lower(type); |
201 | | |
202 | | // TODO: Complain if 'link' tags use external stylesheets. |
203 | | do_url( string( what[3].first, what[3].second ), |
204 | | library_name, full_path, no_link_errors, |
205 | | type == "a" || type == "link", contents.begin(), what[3].first ); |
| 210 | // TODO: Complain if 'link' tags use external stylesheets. |
| 211 | do_url( string( what[3].first, what[3].second ), |
| 212 | library_name, full_path, no_link_errors, |
| 213 | type == "a" || type == "link", contents.begin(), what[3].first ); |
| 214 | } |
206 | 215 | |
207 | 216 | start = what[0].second; // update search position |
208 | 217 | flags |= boost::match_prev_avail; // update flags |
… |
… |
namespace boost
|
214 | 223 | { |
215 | 224 | // what[0] contains the whole string iterators. |
216 | 225 | // what[2] contains the URL iterators. |
217 | | do_url( string( what[2].first, what[2].second ), |
218 | | library_name, full_path, no_link_errors, false, |
219 | | contents.begin(), what[3].first ); |
| 226 | |
| 227 | if(what[2].matched) |
| 228 | { |
| 229 | do_url( string( what[2].first, what[2].second ), |
| 230 | library_name, full_path, no_link_errors, false, |
| 231 | contents.begin(), what[3].first ); |
| 232 | } |
220 | 233 | |
221 | 234 | start = what[0].second; // update search position |
222 | 235 | flags |= boost::match_prev_avail; // update flags |