1 | /*=============================================================================
|
---|
2 | Copyright (c) 2001-2011 Joel de Guzman
|
---|
3 |
|
---|
4 | Distributed under the Boost Software License, Version 1.0. (See accompanying
|
---|
5 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
---|
6 | =============================================================================*/
|
---|
7 | #include <boost/config/warning_disable.hpp>
|
---|
8 | #include <boost/spirit/include/qi.hpp>
|
---|
9 | #include <boost/spirit/include/phoenix.hpp>
|
---|
10 | #include <boost/unordered_map.hpp>
|
---|
11 | #include <boost/algorithm/string/trim.hpp>
|
---|
12 | #include <boost/cstdint.hpp>
|
---|
13 | #include <boost/foreach.hpp>
|
---|
14 | #include <boost/array.hpp>
|
---|
15 | #include <boost/scoped_array.hpp>
|
---|
16 | #include <boost/range/iterator_range.hpp>
|
---|
17 |
|
---|
18 | #include <iostream>
|
---|
19 | #include <fstream>
|
---|
20 | #include <vector>
|
---|
21 | #include <algorithm>
|
---|
22 | #include <string>
|
---|
23 | #include <map>
|
---|
24 | #include <iomanip>
|
---|
25 |
|
---|
26 | // We place the data here. Each line comprises various fields
|
---|
27 | typedef std::vector<std::string> ucd_line;
|
---|
28 | typedef std::vector<ucd_line> ucd_vector;
|
---|
29 | typedef std::vector<ucd_line>::iterator ucd_iterator;
|
---|
30 |
|
---|
31 | // spirit and phoenix using declarations
|
---|
32 | using boost::spirit::qi::parse;
|
---|
33 | using boost::spirit::qi::hex;
|
---|
34 | using boost::spirit::qi::char_;
|
---|
35 | using boost::spirit::qi::eol;
|
---|
36 | using boost::spirit::qi::rule;
|
---|
37 | using boost::spirit::qi::omit;
|
---|
38 | using boost::spirit::qi::_1;
|
---|
39 | using boost::spirit::qi::_val;
|
---|
40 | using boost::phoenix::push_back;
|
---|
41 | using boost::phoenix::ref;
|
---|
42 |
|
---|
43 | // basic unsigned types
|
---|
44 | using boost::uint8_t;
|
---|
45 | using boost::uint16_t;
|
---|
46 | using boost::uint32_t;
|
---|
47 |
|
---|
48 | // a char range
|
---|
49 | struct ucd_range
|
---|
50 | {
|
---|
51 | ucd_range(uint32_t start, uint32_t finish)
|
---|
52 | : start(start), finish(finish) {}
|
---|
53 |
|
---|
54 | // we need this so we can use ucd_range as a multimap key
|
---|
55 | friend bool operator<(ucd_range const& a, ucd_range const& b)
|
---|
56 | {
|
---|
57 | return a.start < b.start;
|
---|
58 | }
|
---|
59 |
|
---|
60 | uint32_t start;
|
---|
61 | uint32_t finish;
|
---|
62 | };
|
---|
63 |
|
---|
64 | class ucd_info
|
---|
65 | {
|
---|
66 | public:
|
---|
67 |
|
---|
68 | ucd_info(char const* filename)
|
---|
69 | {
|
---|
70 | std::ifstream in(filename, std::ios_base::in);
|
---|
71 | if (!in)
|
---|
72 | {
|
---|
73 | std::cerr << "Error: Could not open input file: "
|
---|
74 | << filename << std::endl;
|
---|
75 | }
|
---|
76 | else
|
---|
77 | {
|
---|
78 | std::string data; // We will read the contents here.
|
---|
79 | in.unsetf(std::ios::skipws); // No white space skipping!
|
---|
80 | std::copy(
|
---|
81 | std::istream_iterator<char>(in),
|
---|
82 | std::istream_iterator<char>(),
|
---|
83 | std::back_inserter(data));
|
---|
84 |
|
---|
85 | typedef std::string::const_iterator iterator_type;
|
---|
86 | iterator_type f = data.begin();
|
---|
87 | iterator_type l = data.end();
|
---|
88 |
|
---|
89 | rule<iterator_type> endl = -('#' >> *(char_-eol)) >> eol;
|
---|
90 | rule<iterator_type, std::string()> field = *(char_-(';'|endl)) >> (';'|&endl);
|
---|
91 | rule<iterator_type, ucd_line()> line = +(field-endl) >> endl;
|
---|
92 | rule<iterator_type, std::vector<ucd_line>()> file = +(endl | line[push_back(_val, _1)]);
|
---|
93 |
|
---|
94 | parse(f, l, file, info);
|
---|
95 | }
|
---|
96 | }
|
---|
97 |
|
---|
98 | template <typename Array>
|
---|
99 | void collect(Array& data, int field, bool collect_properties = true) const
|
---|
100 | {
|
---|
101 | BOOST_ASSERT(!info.empty());
|
---|
102 | ucd_vector::const_iterator current = info.begin();
|
---|
103 | ucd_vector::const_iterator end = info.end();
|
---|
104 |
|
---|
105 | while (current != end)
|
---|
106 | {
|
---|
107 | std::string range = (*current)[0];
|
---|
108 | boost::trim(range);
|
---|
109 |
|
---|
110 | std::string::const_iterator f = range.begin();
|
---|
111 | std::string::const_iterator l = range.end();
|
---|
112 |
|
---|
113 | // get the code-point range
|
---|
114 | uint32_t start;
|
---|
115 | uint32_t finish;
|
---|
116 | parse(f, l, hex[ref(start) = ref(finish) = _1] >> -(".." >> hex[ref(finish) = _1]));
|
---|
117 |
|
---|
118 | // special case for UnicodeData.txt ranges:
|
---|
119 | if ((*current)[1].find("First>") != std::string::npos)
|
---|
120 | {
|
---|
121 | ++current;
|
---|
122 | BOOST_ASSERT(current != end);
|
---|
123 | BOOST_ASSERT((*current)[1].find("Last>") != std::string::npos);
|
---|
124 |
|
---|
125 | std::string range = (*current)[0];
|
---|
126 | boost::trim(range);
|
---|
127 | f = range.begin();
|
---|
128 | l = range.end();
|
---|
129 |
|
---|
130 | parse(f, l, hex[ref(finish) = _1]);
|
---|
131 | }
|
---|
132 |
|
---|
133 | std::string code;
|
---|
134 | if (field < int(current->size()))
|
---|
135 | code = (*current)[field];
|
---|
136 | boost::trim(code);
|
---|
137 | // Only collect properties we are interested in
|
---|
138 | if (collect_properties) // code for properties
|
---|
139 | {
|
---|
140 | if (!ignore_property(code))
|
---|
141 | {
|
---|
142 | for (uint32_t i = start; i <= finish; ++i)
|
---|
143 | data[i] |= map_property(code);
|
---|
144 | }
|
---|
145 | }
|
---|
146 | else // code for actual numeric values
|
---|
147 | {
|
---|
148 | for (uint32_t i = start; i <= finish; ++i)
|
---|
149 | {
|
---|
150 | if (code.empty())
|
---|
151 | {
|
---|
152 | data[i] = 0; // signal that this code maps to itself
|
---|
153 | }
|
---|
154 | else
|
---|
155 | {
|
---|
156 | f = code.begin();
|
---|
157 | l = code.end();
|
---|
158 | parse(f, l, hex, data[i]);
|
---|
159 | }
|
---|
160 | }
|
---|
161 | }
|
---|
162 | ++current;
|
---|
163 | }
|
---|
164 | }
|
---|
165 |
|
---|
166 | private:
|
---|
167 |
|
---|
168 | static bool ignore_property(std::string const& p)
|
---|
169 | {
|
---|
170 | // We don't handle all properties
|
---|
171 | std::map<std::string, int>& pm = get_property_map();
|
---|
172 | std::map<std::string, int>::iterator i = pm.find(p);
|
---|
173 | return i == pm.end();
|
---|
174 | }
|
---|
175 |
|
---|
176 | static int
|
---|
177 | map_property(std::string const& p)
|
---|
178 | {
|
---|
179 | std::map<std::string, int>& pm = get_property_map();
|
---|
180 | std::map<std::string, int>::iterator i = pm.find(p);
|
---|
181 | BOOST_ASSERT(i != pm.end());
|
---|
182 | return i->second;
|
---|
183 | }
|
---|
184 |
|
---|
185 | static std::map<std::string, int>&
|
---|
186 | get_property_map()
|
---|
187 | {
|
---|
188 | // The properties we are interested in:
|
---|
189 | static std::map<std::string, int> map;
|
---|
190 | if (map.empty())
|
---|
191 | {
|
---|
192 | // General_Category
|
---|
193 | map["Lu"] = 0;
|
---|
194 | map["Ll"] = 1;
|
---|
195 | map["Lt"] = 2;
|
---|
196 | map["Lm"] = 3;
|
---|
197 | map["Lo"] = 4;
|
---|
198 |
|
---|
199 | map["Mn"] = 8;
|
---|
200 | map["Me"] = 9;
|
---|
201 | map["Mc"] = 10;
|
---|
202 |
|
---|
203 | map["Nd"] = 16;
|
---|
204 | map["Nl"] = 17;
|
---|
205 | map["No"] = 18;
|
---|
206 |
|
---|
207 | map["Zs"] = 24;
|
---|
208 | map["Zl"] = 25;
|
---|
209 | map["Zp"] = 26;
|
---|
210 |
|
---|
211 | map["Cc"] = 32;
|
---|
212 | map["Cf"] = 33;
|
---|
213 | map["Co"] = 34;
|
---|
214 | map["Cs"] = 35;
|
---|
215 | map["Cn"] = 36;
|
---|
216 |
|
---|
217 | map["Pd"] = 40;
|
---|
218 | map["Ps"] = 41;
|
---|
219 | map["Pe"] = 42;
|
---|
220 | map["Pc"] = 43;
|
---|
221 | map["Po"] = 44;
|
---|
222 | map["Pi"] = 45;
|
---|
223 | map["Pf"] = 46;
|
---|
224 |
|
---|
225 | map["Sm"] = 48;
|
---|
226 | map["Sc"] = 49;
|
---|
227 | map["Sk"] = 50;
|
---|
228 | map["So"] = 51;
|
---|
229 |
|
---|
230 | // Derived Properties.
|
---|
231 | map["Alphabetic"] = 64;
|
---|
232 | map["Uppercase"] = 128;
|
---|
233 | map["Lowercase"] = 256;
|
---|
234 | map["White_Space"] = 512;
|
---|
235 | map["Hex_Digit"] = 1024;
|
---|
236 | map["Noncharacter_Code_Point"] = 2048;
|
---|
237 | map["Default_Ignorable_Code_Point"] = 4096;
|
---|
238 |
|
---|
239 | // Script
|
---|
240 | map["Arabic"] = 0;
|
---|
241 | map["Imperial_Aramaic"] = 1;
|
---|
242 | map["Armenian"] = 2;
|
---|
243 | map["Avestan"] = 3;
|
---|
244 | map["Balinese"] = 4;
|
---|
245 | map["Bamum"] = 5;
|
---|
246 | map["Bengali"] = 6;
|
---|
247 | map["Bopomofo"] = 7;
|
---|
248 | map["Braille"] = 8;
|
---|
249 | map["Buginese"] = 9;
|
---|
250 | map["Buhid"] = 10;
|
---|
251 | map["Canadian_Aboriginal"] = 11;
|
---|
252 | map["Carian"] = 12;
|
---|
253 | map["Cham"] = 13;
|
---|
254 | map["Cherokee"] = 14;
|
---|
255 | map["Coptic"] = 15;
|
---|
256 | map["Cypriot"] = 16;
|
---|
257 | map["Cyrillic"] = 17;
|
---|
258 | map["Devanagari"] = 18;
|
---|
259 | map["Deseret"] = 19;
|
---|
260 | map["Egyptian_Hieroglyphs"] = 20;
|
---|
261 | map["Ethiopic"] = 21;
|
---|
262 | map["Georgian"] = 22;
|
---|
263 | map["Glagolitic"] = 23;
|
---|
264 | map["Gothic"] = 24;
|
---|
265 | map["Greek"] = 25;
|
---|
266 | map["Gujarati"] = 26;
|
---|
267 | map["Gurmukhi"] = 27;
|
---|
268 | map["Hangul"] = 28;
|
---|
269 | map["Han"] = 29;
|
---|
270 | map["Hanunoo"] = 30;
|
---|
271 | map["Hebrew"] = 31;
|
---|
272 | map["Hiragana"] = 32;
|
---|
273 | map["Katakana_Or_Hiragana"] = 33;
|
---|
274 | map["Old_Italic"] = 34;
|
---|
275 | map["Javanese"] = 35;
|
---|
276 | map["Kayah_Li"] = 36;
|
---|
277 | map["Katakana"] = 37;
|
---|
278 | map["Kharoshthi"] = 38;
|
---|
279 | map["Khmer"] = 39;
|
---|
280 | map["Kannada"] = 40;
|
---|
281 | map["Kaithi"] = 41;
|
---|
282 | map["Tai_Tham"] = 42;
|
---|
283 | map["Lao"] = 43;
|
---|
284 | map["Latin"] = 44;
|
---|
285 | map["Lepcha"] = 45;
|
---|
286 | map["Limbu"] = 46;
|
---|
287 | map["Linear_B"] = 47;
|
---|
288 | map["Lisu"] = 48;
|
---|
289 | map["Lycian"] = 49;
|
---|
290 | map["Lydian"] = 50;
|
---|
291 | map["Malayalam"] = 51;
|
---|
292 | map["Mongolian"] = 52;
|
---|
293 | map["Meetei_Mayek"] = 53;
|
---|
294 | map["Myanmar"] = 54;
|
---|
295 | map["Nko"] = 55;
|
---|
296 | map["Ogham"] = 56;
|
---|
297 | map["Ol_Chiki"] = 57;
|
---|
298 | map["Old_Turkic"] = 58;
|
---|
299 | map["Oriya"] = 59;
|
---|
300 | map["Osmanya"] = 60;
|
---|
301 | map["Phags_Pa"] = 61;
|
---|
302 | map["Inscriptional_Pahlavi"] = 62;
|
---|
303 | map["Phoenician"] = 63;
|
---|
304 | map["Inscriptional_Parthian"] = 64;
|
---|
305 | map["Rejang"] = 65;
|
---|
306 | map["Runic"] = 66;
|
---|
307 | map["Samaritan"] = 67;
|
---|
308 | map["Old_South_Arabian"] = 68;
|
---|
309 | map["Saurashtra"] = 69;
|
---|
310 | map["Shavian"] = 70;
|
---|
311 | map["Sinhala"] = 71;
|
---|
312 | map["Sundanese"] = 72;
|
---|
313 | map["Syloti_Nagri"] = 73;
|
---|
314 | map["Syriac"] = 74;
|
---|
315 | map["Tagbanwa"] = 75;
|
---|
316 | map["Tai_Le"] = 76;
|
---|
317 | map["New_Tai_Lue"] = 77;
|
---|
318 | map["Tamil"] = 78;
|
---|
319 | map["Tai_Viet"] = 79;
|
---|
320 | map["Telugu"] = 80;
|
---|
321 | map["Tifinagh"] = 81;
|
---|
322 | map["Tagalog"] = 82;
|
---|
323 | map["Thaana"] = 83;
|
---|
324 | map["Thai"] = 84;
|
---|
325 | map["Tibetan"] = 85;
|
---|
326 | map["Ugaritic"] = 86;
|
---|
327 | map["Vai"] = 87;
|
---|
328 | map["Old_Persian"] = 88;
|
---|
329 | map["Cuneiform"] = 89;
|
---|
330 | map["Yi"] = 90;
|
---|
331 | map["Inherited"] = 91;
|
---|
332 | map["Common"] = 92;
|
---|
333 | map["Unknown"] = 93;
|
---|
334 | }
|
---|
335 | return map;
|
---|
336 | }
|
---|
337 |
|
---|
338 | ucd_vector info;
|
---|
339 | };
|
---|
340 |
|
---|
341 | template <typename T, uint32_t block_size_ = 256>
|
---|
342 | class ucd_table_builder
|
---|
343 | {
|
---|
344 | public:
|
---|
345 |
|
---|
346 | static uint32_t const block_size = block_size_;
|
---|
347 | static uint32_t const full_span = 0x110000;
|
---|
348 | typedef T value_type;
|
---|
349 |
|
---|
350 | ucd_table_builder() : p(new T[full_span])
|
---|
351 | {
|
---|
352 | for (uint32_t i = 0; i < full_span; ++i)
|
---|
353 | p[i] = 0;
|
---|
354 | }
|
---|
355 |
|
---|
356 | void collect(char const* filename, int field, bool collect_properties = true)
|
---|
357 | {
|
---|
358 | std::cout << "collecting " << filename << std::endl;
|
---|
359 | ucd_info info(filename);
|
---|
360 | info.collect(p, field, collect_properties);
|
---|
361 | }
|
---|
362 |
|
---|
363 | void build(std::vector<uint8_t>& stage1, std::vector<T const*>& stage2)
|
---|
364 | {
|
---|
365 | std::cout << "building tables" << std::endl;
|
---|
366 | std::map<block_ptr, std::vector<T const*> > blocks;
|
---|
367 | for (T const* i = p.get(); i < (p.get() + full_span); i += block_size)
|
---|
368 | blocks[block_ptr(i)].push_back(i);
|
---|
369 |
|
---|
370 | // Not enough bits to store the block indices.
|
---|
371 | BOOST_ASSERT(blocks.size() < (1 << (sizeof(uint8_t) * 8)));
|
---|
372 |
|
---|
373 | typedef std::pair<block_ptr, std::vector<T const*> > blocks_value_type;
|
---|
374 | std::map<T const*, std::vector<T const*> > sorted_blocks;
|
---|
375 | BOOST_FOREACH(blocks_value_type const& val, blocks)
|
---|
376 | {
|
---|
377 | sorted_blocks[val.first.p] = val.second;
|
---|
378 | }
|
---|
379 |
|
---|
380 | stage1.clear();
|
---|
381 | stage1.reserve(full_span / block_size);
|
---|
382 | stage1.resize(full_span / block_size);
|
---|
383 | stage2.clear();
|
---|
384 | stage2.reserve(blocks.size());
|
---|
385 |
|
---|
386 | typedef std::pair<T const*, std::vector<T const*> > sorted_blocks_value_type;
|
---|
387 | BOOST_FOREACH(sorted_blocks_value_type const& val, sorted_blocks)
|
---|
388 | {
|
---|
389 | stage2.push_back(val.first);
|
---|
390 | BOOST_FOREACH(T const* val2, val.second)
|
---|
391 | {
|
---|
392 | stage1[(val2 - p.get()) / block_size] = stage2.size() - 1;
|
---|
393 | }
|
---|
394 | }
|
---|
395 | }
|
---|
396 |
|
---|
397 | private:
|
---|
398 |
|
---|
399 | struct block_ptr
|
---|
400 | {
|
---|
401 | block_ptr(T const* p) : p(p) {}
|
---|
402 |
|
---|
403 | friend bool operator<(block_ptr a, block_ptr b)
|
---|
404 | {
|
---|
405 | return std::lexicographical_compare(
|
---|
406 | a.p, a.p + block_size, b.p, b.p + block_size);
|
---|
407 | }
|
---|
408 |
|
---|
409 | T const* p;
|
---|
410 | };
|
---|
411 |
|
---|
412 | boost::scoped_array<T> p;
|
---|
413 | };
|
---|
414 |
|
---|
415 | template <typename Out>
|
---|
416 | void print_tab(Out& out, int tab)
|
---|
417 | {
|
---|
418 | for (int i = 0; i < tab; ++i)
|
---|
419 | out << ' ';
|
---|
420 | }
|
---|
421 |
|
---|
422 | template <typename Out, typename C>
|
---|
423 | void print_table(Out& out, C const& c, bool trailing_comma, int width = 4, int group = 16)
|
---|
424 | {
|
---|
425 | int const tab = 4;
|
---|
426 | typename C::size_type size = c.size();
|
---|
427 | BOOST_ASSERT(size > 1);
|
---|
428 | print_tab(out, tab);
|
---|
429 | out << std::setw(width) << int(c[0]);
|
---|
430 | for (typename C::size_type i = 1; i < size; ++i)
|
---|
431 | {
|
---|
432 | out << ", ";
|
---|
433 | if ((i % group) == 0)
|
---|
434 | {
|
---|
435 | out << std::endl;
|
---|
436 | print_tab(out, tab);
|
---|
437 | }
|
---|
438 | out << std::setw(width) << int(c[i]);
|
---|
439 | }
|
---|
440 |
|
---|
441 | if (trailing_comma)
|
---|
442 | out << ", " << std::endl;
|
---|
443 | }
|
---|
444 |
|
---|
445 | template <typename Out>
|
---|
446 | void print_head(Out& out)
|
---|
447 | {
|
---|
448 | out
|
---|
449 | << "/*=============================================================================\n"
|
---|
450 | << " Copyright (c) 2001-2011 Joel de Guzman\n"
|
---|
451 | << "\n"
|
---|
452 | << " Distributed under the Boost Software License, Version 1.0. (See accompanying\n"
|
---|
453 | << " file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)\n"
|
---|
454 | << "\n"
|
---|
455 | << " AUTOGENERATED. DO NOT EDIT!!!\n"
|
---|
456 | << "==============================================================================*/\n"
|
---|
457 | << "#include <boost/cstdint.hpp>\n"
|
---|
458 | << "\n"
|
---|
459 | << "namespace boost { namespace spirit { namespace ucd { namespace detail\n"
|
---|
460 | << "{"
|
---|
461 | ;
|
---|
462 | }
|
---|
463 |
|
---|
464 | template <typename Out>
|
---|
465 | void print_tail(Out& out)
|
---|
466 | {
|
---|
467 | out
|
---|
468 | << "\n"
|
---|
469 | << "}}}} // namespace boost::spirit::unicode::detail\n"
|
---|
470 | ;
|
---|
471 | }
|
---|
472 |
|
---|
473 | char const* get_int_type_name(int size)
|
---|
474 | {
|
---|
475 | switch (size)
|
---|
476 | {
|
---|
477 | case 1: return "::boost::uint8_t";
|
---|
478 | case 2: return "::boost::uint16_t";
|
---|
479 | case 4: return "::boost::uint32_t";
|
---|
480 | case 5: return "::boost::uint64_t";
|
---|
481 | default: BOOST_ASSERT(false); return 0; // invalid size
|
---|
482 | };
|
---|
483 | }
|
---|
484 |
|
---|
485 | template <typename Out, typename Builder>
|
---|
486 | void print_file(Out& out, Builder& builder, int field_width, char const* name)
|
---|
487 | {
|
---|
488 | std::cout << "Generating " << name << " tables" << std::endl;
|
---|
489 |
|
---|
490 | uint32_t const block_size = Builder::block_size;
|
---|
491 | typedef typename Builder::value_type value_type;
|
---|
492 | print_head(out);
|
---|
493 |
|
---|
494 | std::vector<uint8_t> stage1;
|
---|
495 | std::vector<value_type const*> stage2;
|
---|
496 | builder.build(stage1, stage2);
|
---|
497 | std::cout << "Block Size: " << block_size << std::endl;
|
---|
498 | std::cout << "Total Bytes: "
|
---|
499 | << stage1.size()+(stage2.size()*block_size*sizeof(value_type))
|
---|
500 | << std::endl;
|
---|
501 |
|
---|
502 | out
|
---|
503 | << "\n"
|
---|
504 | << " static const ::boost::uint8_t " << name << "_stage1[] = {\n"
|
---|
505 | << "\n"
|
---|
506 | ;
|
---|
507 |
|
---|
508 | print_table(out, stage1, false, 3);
|
---|
509 | char const* int_name = get_int_type_name(sizeof(value_type));
|
---|
510 |
|
---|
511 | out
|
---|
512 | << "\n"
|
---|
513 | << " };"
|
---|
514 | << "\n"
|
---|
515 | << "\n"
|
---|
516 | << " static const " << int_name << ' ' << name << "_stage2[] = {"
|
---|
517 | ;
|
---|
518 |
|
---|
519 | int block_n = 0;
|
---|
520 | for (int i = 0; i < int(stage2.size()); ++i)
|
---|
521 | {
|
---|
522 | value_type const* p = stage2[i];
|
---|
523 | bool last = (i+1 == stage2.size());
|
---|
524 | out << "\n\n // block " << block_n++ << std::endl;
|
---|
525 | print_table(out,
|
---|
526 | boost::iterator_range<value_type const*>(p, p+block_size), !last, field_width);
|
---|
527 | }
|
---|
528 |
|
---|
529 | out
|
---|
530 | << "\n"
|
---|
531 | << " };"
|
---|
532 | << "\n"
|
---|
533 | ;
|
---|
534 |
|
---|
535 | out
|
---|
536 | << "\n"
|
---|
537 | << " inline " << int_name << ' ' << name << "_lookup(::boost::uint32_t ch)\n"
|
---|
538 | << " {\n"
|
---|
539 | << " ::boost::uint32_t block_offset = " << name << "_stage1[ch / " << block_size << "] * " << block_size << ";\n"
|
---|
540 | << " return " << name << "_stage2[block_offset + ch % " << block_size << "];\n"
|
---|
541 | << " }\n"
|
---|
542 | ;
|
---|
543 |
|
---|
544 | print_tail(out);
|
---|
545 | }
|
---|
546 |
|
---|
547 | int main()
|
---|
548 | {
|
---|
549 | // The category tables
|
---|
550 | {
|
---|
551 | std::ofstream out("category_table.hpp");
|
---|
552 | ucd_table_builder<uint16_t, 256> builder;
|
---|
553 | builder.collect("UnicodeData.txt", 2);
|
---|
554 | builder.collect("DerivedCoreProperties.txt", 1);
|
---|
555 | builder.collect("PropList.txt", 1);
|
---|
556 | print_file(out, builder, 4, "category");
|
---|
557 | }
|
---|
558 |
|
---|
559 | // The script tables
|
---|
560 | {
|
---|
561 | std::ofstream out("script_table.hpp");
|
---|
562 | ucd_table_builder<uint8_t, 256> builder;
|
---|
563 | builder.collect("Scripts.txt", 1);
|
---|
564 | print_file(out, builder, 3, "script");
|
---|
565 | }
|
---|
566 |
|
---|
567 | // The lowercase tables
|
---|
568 | {
|
---|
569 | std::ofstream out("lowercase_table.hpp");
|
---|
570 | ucd_table_builder<uint32_t, 256> builder;
|
---|
571 | builder.collect("UnicodeData.txt", 13, false);
|
---|
572 | print_file(out, builder, 6, "lowercase");
|
---|
573 | }
|
---|
574 |
|
---|
575 | // The uppercase tables
|
---|
576 | {
|
---|
577 | std::ofstream out("uppercase_table.hpp");
|
---|
578 | ucd_table_builder<uint32_t, 256> builder;
|
---|
579 | builder.collect("UnicodeData.txt", 12, false);
|
---|
580 | print_file(out, builder, 6, "uppercase");
|
---|
581 | }
|
---|
582 |
|
---|
583 | return 0;
|
---|
584 | }
|
---|