Ticket #3218: StrUtils.hpp

File StrUtils.hpp, 17.1 KB (added by Yuri Goldfeld <yuri_goldfeld@…>, 13 years ago)

Part of a hand-made string library, used to compare against Boost performance-wise.

Line 
1#include <string>
2
3// Wrapper around tolower().
4inline char ToLowerCopy(char value);
5// Similar.
6inline char ToUpperCopy(char value);
7
8/* Destructively applies ToLowerCopy() to each character in given string.
9 * @param str
10 * String which to modify.
11 * @param T
12 * Any character type for which ToLowerCopy() is defined. Examples: char, wchar_t. */
13template<typename T>
14inline void ToLower(std::basic_string<T>& str);
15
16// Same, but using T* as the string type (e.g., char*).
17template<typename T>
18inline void ToLower(T* str);
19// Same, but for any string type T that supports operator[] (e.g., char*, string).
20template<typename T>
21inline void ToLower(T& str, size_t len);
22
23// Like the above, but the opposite.
24template<typename T>
25inline void ToUpper(std::basic_string<T>& str);
26template<typename T>
27inline void ToUpper(T* str);
28template<typename T>
29inline void ToUpper(T& str, size_t len);
30
31// A non-destructive version of ToLower() which returns a modified copy of the input string.
32template<typename T>
33inline std::basic_string<T> ToLowerCopy(std::basic_string<T> const& str);
34
35// A non-destructive version of ToLower() which returns a modified copy of the input string.
36template<typename T>
37inline std::basic_string<T> ToUpperCopy(std::basic_string<T> const& str);
38
39/* Returns true if and only if the two given strings are equal, ignoring case.
40 * @param s1
41 * String 1.
42 * @param s2
43 * String 2.
44 * @param T
45 * Any character type for which ToLowerCopy() is defined. Examples: char, wchar_t.
46 * @return See above. */
47template<typename T>
48inline bool EqualsI(std::basic_string<T> const& s1, std::basic_string<T> const& s2);
49
50// Same, but using T* as the string type (e.g., char*). This allows for faster comparison against C-string literals.
51template<typename T>
52inline bool EqualsI(std::basic_string<T> const& s1, T const* s2);
53template<typename T>
54inline bool EqualsI(T const* s1, std::basic_string<T> const& s2);
55template<typename T>
56inline bool EqualsI(T const* s1, T const* s2);
57// Same, but uses T[] arrays of given length.
58template<typename T>
59inline bool EqualsI(T const* s1, size_t size1, T const* s2, size_t size2);
60// Same, but both arrays are assumed to be of the same given size.
61template<typename T>
62inline bool EqualsI(T const* s1, size_t size, T const* s2);
63
64/* Returns true if and only if the needle is contained in the haystack. CASE-SENSITIVE!
65 * @param haystack
66 * String to search.
67 * @param needle
68 * String for which to search.
69 * @param T
70 * Character type. Examples: char, wchar_t. */
71template<typename T>
72inline bool Contains(std::basic_string<T> const& haystack, std::basic_string<T> const& needle);
73
74// Same, but using T* as the string type (e.g., char*). This allows for faster comparison against C-string literal.
75template<typename T>
76inline bool Contains(std::basic_string<T> const& haystack, T const* needle);
77// Same, but uses T[] arrays of given length.
78template<typename T>
79inline bool Contains(std::basic_string<T> const& haystack, T const* needle, size_t needleSize);
80
81// Same as above, but case-insensitive versions.
82template<typename T>
83inline bool ContainsI(std::basic_string<T> const& haystack, std::basic_string<T> const& needle);
84template<typename T>
85inline bool ContainsI(std::basic_string<T> const& haystack, T const* needle);
86template<typename T>
87inline bool ContainsI(std::basic_string<T> const& haystack, T const* needle, size_t needleSize);
88
89/* Returns true if and only if the first needle.size() characters of haystack exist and are equal to needle.
90 * CASE-SENSITIVE!
91 * @param haystack
92 * String to search.
93 * @param needle
94 * Prefix for which to search.
95 * @param T
96 * Character type. Examples: char, wchar_t. */
97template<typename T>
98inline bool StartsWith(std::basic_string<T> const& haystack, std::basic_string<T> const& needle);
99
100// Same, but using T* as the string type (e.g., char*). This allows for faster comparison against C-string literal.
101template<typename T>
102inline bool StartsWith(std::basic_string<T> const& haystack, T const* needle);
103// Same, but uses T[] arrays of given length.
104template<typename T>
105inline bool StartsWith(std::basic_string<T> const& haystack, T const* needle, size_t needleSize);
106
107// Same as above, but case-insensitive versions.
108template<typename T>
109inline bool StartsWithI(std::basic_string<T> const& haystack, std::basic_string<T> const& needle);
110template<typename T>
111inline bool StartsWithI(std::basic_string<T> const& haystack, T const* needle);
112template<typename T>
113inline bool StartsWithI(std::basic_string<T> const& haystack, T const* needle, size_t needleSize);
114
115// Same as above, but compares the end of the haystack instead of the beginning.
116template<typename T>
117inline bool EndsWith(std::basic_string<T> const& haystack, std::basic_string<T> const& needle);
118template<typename T>
119inline bool EndsWith(std::basic_string<T> const& haystack, T const* needle);
120template<typename T>
121inline bool EndsWith(std::basic_string<T> const& haystack, T const* needle, size_t needleSize);
122template<typename T>
123inline bool EndsWithI(std::basic_string<T> const& haystack, std::basic_string<T> const& needle);
124template<typename T>
125inline bool EndsWithI(std::basic_string<T> const& haystack, T const* needle);
126template<typename T>
127inline bool EndsWithI(std::basic_string<T> const& haystack, T const* needle, size_t needleSize);
128
129/* Replaces every occurrence of the given character with the other given character.
130 * @param haystack
131 * String to possibly modify.
132 * @param needle
133 * Character to possibly replace. If haystack is UTF-8 (char) or UTF-16 (wchar_t), this should be ASCII.
134 * @param rep
135 * Replacement character. Same UTF-8/16 guidelines as needle.
136 * @param T
137 * Character type. Examples: char, wchar_t. */
138template<typename T>
139inline void ReplaceAll(std::basic_string<T>& haystack, T const& needle, T const& rep);
140
141// Non-destructive version that returns a possibly modified copy of the haystack.
142template<typename T>
143inline std::basic_string<T> ReplaceAllCopy(std::basic_string<T> const& haystack, T const& needle, T const& rep);
144
145/* Removes every occurrence within the haystack string of any of the characters in the needleSet string.
146 * This is CASE-SENSITIVE.
147 * @param haystack
148 * The string to potentially modify.
149 * @param needleSet
150 * The set of characters each of which to remove. If haystack is in UTF-8 (char) or UTF-16 (wchar_t), each
151 * character here should be ASCII.
152 * @param T
153 * A character type. Examples: char, wchar_t. */
154template<typename T>
155inline void StripChars(std::basic_string<T>& haystack, std::basic_string<T> const& needleSet);
156
157// Same, but using T* as the string type (e.g., char*). This allows for faster comparison against C-string literal.
158template<typename T, typename NeedleString>
159inline void StripChars(std::basic_string<T>& haystack, T const* needleSet);
160
161// Same, but uses T[] arrays of given length.
162template<typename T>
163inline void StripChars(std::basic_string<T>& haystack, T const* needleSet, size_t needleSetSize);
164
165// Non-destructive versions of the above strip functions (returning possibly modified copies of the input strings).
166template<typename T, typename NeedleString>
167inline std::basic_string<T> StripCharsCopy(std::basic_string<T> const& haystack, NeedleString const& needleSet);
168template<typename T>
169inline std::basic_string<T> StripCharsCopy(std::basic_string<T> const& haystack,
170 T const* needleSet, size_t needleSetSize);
171
172// Inline/template implementations.
173
174char ToLowerCopy(char value)
175{
176 return
177#if 1
178 tolower(value);
179#else
180 (('A' < value) && (value < 'Z')) ? (value - 'A' + 'a') : value;
181#endif
182}
183
184char ToUpperCopy(char value)
185{
186#if 1
187 return
188 toupper(value);
189#else
190 (('a' < value) && (value < 'z')) ? (value - 'a' + 'A') : value;
191#endif
192}
193
194template<typename T>
195void ToLower(T& str, size_t len)
196{
197 for (size_t i = 0; i != len; ++i)
198 {
199 str[i] = ToLowerCopy(str[i]);
200 }
201}
202
203template<typename T>
204void ToLower(std::basic_string<T>& str)
205{
206 ToLower(str, str.size());
207}
208
209template<typename T>
210void ToLower(T* str)
211{
212 ToLower(str, std::char_traits<T>::length(str));
213}
214
215template<typename T>
216void ToUpper(T& str, size_t len)
217{
218 for (size_t i = 0; i != len; ++i)
219 {
220 str[i] = ToUpperCopy(str[i]);
221 }
222}
223
224template<typename T>
225void ToUpper(std::basic_string<T>& str)
226{
227 ToUpper(str, str.size());
228}
229
230template<typename T>
231void ToUpper(T* str)
232{
233 ToUpper(str, std::char_traits<T>::length(str));
234}
235
236template<typename T>
237std::basic_string<T> ToLowerCopy(std::basic_string<T> const& str)
238{
239 std::basic_string<T> str2 = str;
240 ToLower(str2);
241 return str2;
242}
243
244template<typename T>
245std::basic_string<T> ToUpperCopy(std::basic_string<T> const& str)
246{
247 std::basic_string<T> str2 = str;
248 ToUpper(str2);
249 return str2;
250}
251
252template<typename T>
253bool EqualsI(T const* s1, size_t size, T const* s2)
254{
255 for (size_t i = 0; i != size; ++i)
256 {
257 if (ToLowerCopy(s1[i]) != ToLowerCopy(s2[i]))
258 {
259 return false;
260 }
261 }
262 return true;
263}
264
265template<typename T>
266bool EqualsI(T const* s1, size_t size1, T const* s2, size_t size2)
267{
268 return (size1 == size2) ? EqualsI(s1, size1, s2) : false;
269}
270
271template<typename T>
272bool EqualsI(std::basic_string<T> const& s1, std::basic_string<T> const& s2)
273{
274 return EqualsI(s1.data(), s1.size(), s2.data(), s2.size());
275}
276
277template<typename T>
278bool EqualsI(std::basic_string<T> const& s1, T const* s2)
279{
280 return EqualsI(s1.data(), s1.size(), s2, std::char_traits<T>::length(s2));
281}
282template<typename T>
283bool EqualsI(T const* s1, std::basic_string<T> const& s2)
284{
285 return EqualsI(s2, s1);
286}
287template<typename T>
288bool EqualsI(T const* s1, T const* s2)
289{
290 return EqualsI(s1, std::char_traits<T>::length(s1), s2, std::char_traits<T>::length(s2));
291}
292
293template<typename T>
294bool Contains(std::basic_string<T> const& haystack, T const* needle, size_t needleSize)
295{
296 return haystack.find(needle, 0, needleSize) != std::basic_string<T>::npos;
297}
298
299template<typename T>
300bool Contains(std::basic_string<T> const& haystack, T const* needle)
301{
302 return Contains(haystack, needle, std::char_traits<T>::length(needle));
303}
304
305template<typename T>
306bool Contains(std::basic_string<T> const& haystack, std::basic_string<T> const& needle)
307{
308 return Contains(haystack, needle.data(), needle.size());
309}
310
311template<typename T>
312bool ContainsI(std::basic_string<T> const& haystack, T const* needle, size_t needleSize)
313{
314 // For performance, avoid simply making lower-case versions of both strings.
315 if (needleSize <= haystack.size())
316 {
317 /* Go along the haystack. Each time we see a matching character, increase charsMatched; each time we see
318 * a non-matching one, we start over (reset charsMatched). Stop once we've either matched the entire needle,
319 * or there are too few characters remaining in the haystack to match all of the rest of the needle. */
320 size_t charsMatched = 0;
321 for (size_t i = 0;
322 (charsMatched != needleSize) && (i < haystack.size() - (needleSize - charsMatched) + 1);
323 ++i)
324 {
325 if (ToLowerCopy(haystack[i]) == ToLowerCopy(needle[charsMatched]))
326 {
327 ++charsMatched;
328 }
329 else
330 {
331 charsMatched = 0;
332 }
333 }
334 if (charsMatched == needleSize)
335 {
336 return true;
337 }
338 }
339 return false;
340}
341
342template<typename T>
343bool ContainsI(std::basic_string<T> const& haystack, std::basic_string<T> const& needle)
344{
345 return ContainsI(haystack, needle.data(), needle.size());
346}
347template<typename T>
348bool ContainsI(std::basic_string<T> const& haystack, T const* needle)
349{
350 return ContainsI(haystack, needle, std::char_traits<T>::length(needle));
351}
352
353template<typename T>
354bool StartsWith(std::basic_string<T> const& haystack, T const* needle, size_t needleSize)
355{
356 if (needleSize > haystack.size())
357 {
358 return false;
359 }
360 for (size_t i = 0; i != needleSize; ++i)
361 {
362 if (haystack[i] != needle[i])
363 {
364 return false;
365 }
366 }
367 return true;
368}
369
370template<typename T>
371bool StartsWith(std::basic_string<T> const& haystack, std::basic_string<T> const& needle)
372{
373 return StartsWith(haystack, needle.data(), needle.size());
374}
375
376// Same, but using T* as the string type (e.g., char*). This allows for faster comparison against C-string literal.
377template<typename T>
378bool StartsWith(std::basic_string<T> const& haystack, T const* needle)
379{
380 return StartsWith(haystack, needle, std::char_traits<T>::length(needle));
381}
382
383template<typename T>
384bool StartsWithI(std::basic_string<T> const& haystack, T const* needle, size_t needleSize)
385{
386 return (needleSize > haystack.size()) ?
387 false :
388 EqualsI(haystack.data(), needleSize, needle);
389}
390
391template<typename T>
392bool StartsWithI(std::basic_string<T> const& haystack, std::basic_string<T> const& needle)
393{
394 return StartsWithI(haystack, needle.data(), needle.size());
395}
396
397template<typename T>
398bool StartsWithI(std::basic_string<T> const& haystack, T const* needle)
399{
400 return StartsWithI(haystack, needle, std::char_traits<T>::length(needle));
401}
402
403template<typename T>
404bool EndsWith(std::basic_string<T> const& haystack, T const* needle, size_t needleSize)
405{
406 if (needleSize > haystack.size())
407 {
408 return false;
409 }
410 const size_t offset = haystack.size() - needleSize;
411 for (size_t i = 0; i != needleSize; ++i)
412 {
413 if (haystack[i + offset] != needle[i])
414 {
415 return false;
416 }
417 }
418 return true;
419}
420
421template<typename T>
422bool EndsWith(std::basic_string<T> const& haystack, std::basic_string<T> const& needle)
423{
424 return EndsWith(haystack, needle.data(), needle.size());
425}
426
427template<typename T>
428bool EndsWith(std::basic_string<T> const& haystack, T const* needle)
429{
430 return EndsWith(haystack, needle, std::char_traits<T>::length(needle));
431}
432
433template<typename T>
434bool EndsWithI(std::basic_string<T> const& haystack, T const* needle, size_t needleSize)
435{
436 return (needleSize > haystack.size()) ?
437 false :
438 EqualsI(haystack.data() + haystack.size() - needleSize, needleSize, needle);
439}
440
441template<typename T>
442bool EndsWithI(std::basic_string<T> const& haystack, std::basic_string<T> const& needle)
443{
444 return EndsWithI(haystack, needle.data(), needle.size());
445}
446
447template<typename T>
448bool EndsWithI(std::basic_string<T> const& haystack, T const* needle)
449{
450 return EndsWithI(haystack, needle, std::char_traits<T>::length(needle));
451}
452
453template<typename T>
454void StripChars(std::basic_string<T>& haystack, T const* needleSet, size_t needleSetSize)
455{
456 /* Could try to use std::remove_if() algorithm here (the below is basically a string-specific reimplementation of it).
457 * However, then we'd need to provide a predicate object for "is ch in needleSet?". It's possible but hairy syntax
458 * (though nicely generic), and I'm worried about performance in Windows as well as gcc memory usage. */
459 const size_t npos = std::basic_string<T>::npos;
460
461 /* We must go through the string, writing each character not in needleSet to the beginning of the string (always
462 * following the last character written there). replacedPos is that next following position at the start of each loop
463 * iteration. An added optimization is that we need not write anything before the first "bad" character, since then
464 * we'd just be writing characters onto themselves. So, find the first bad character first, then start the
465 * overwriting. */
466 size_t pos = haystack.find_first_of(needleSet, 0, needleSetSize);
467 if (pos == npos)
468 {
469 return; // All character are good, so nothing to do.
470 }
471 size_t replacedPos = pos;
472 for (++pos; pos != haystack.size(); ++pos)
473 {
474 const T& ch = haystack[pos];
475 // Use the uber-fast implementation of find for char type T (probably memchr() or wchar_t equivalent).
476 if (!(std::char_traits<T>::find(needleSet, needleSetSize, ch)))
477 {
478 haystack[replacedPos++] = ch; // It's a good character, so copy it earlier in string.
479 }
480 }
481
482 // Done moving all the good characters to the start of the string. Hack off the garbage following that.
483 haystack.resize(replacedPos);
484}
485
486template<typename T>
487void StripChars(std::basic_string<T>& haystack, std::basic_string<T> const& needleSet)
488{
489 StripChars(haystack, needleSet.data(), needleSet.size());
490}
491
492template<typename T>
493void StripChars(std::basic_string<T>& haystack, T const* needleSet)
494{
495 StripChars(haystack, needleSet, std::char_traits<T>::length(needleSet));
496}
497
498template<typename T, typename NeedleString>
499std::basic_string<T> StripCharsCopy(std::basic_string<T> const& haystack, NeedleString const& needleSet)
500{
501 std::basic_string<T> haystack2 = haystack;
502 StripChars(haystack2, needleSet);
503 return haystack2;
504}
505
506template<typename T>
507std::basic_string<T> StripCharsCopy(std::basic_string<T> const& haystack,
508 T const* needleSet, size_t needleSetSize)
509{
510 std::basic_string<T> haystack2 = haystack;
511 StripChars(haystack2, needleSet, needleSetSize);
512 return haystack2;
513}
514
515template<typename T>
516void ReplaceAll(std::basic_string<T>& haystack, T const& needle, T const& rep)
517{
518 // Just use the standard algorithm, as a basic_string<T> is a sequence of Ts.
519 std::replace(haystack.begin(), haystack.end(), needle, rep);
520}
521
522// Non-destructive version that returns a possibly modified copy of the haystack.
523template<typename T>
524std::basic_string<T> ReplaceAllCopy(std::basic_string<T> const& haystack, T const& needle, T const& rep)
525{
526 std::basic_string<T> haystack2 = haystack;
527 RSReplaceAll(haystack2, needle, rep);
528 return haystack2;
529}
530