AlterOffice
AlterOffice 3.4 SDK C/C++ API Reference
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
character.hxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 
3 
4 #ifndef INCLUDED_RTL_CHARACTER_HXX
5 #define INCLUDED_RTL_CHARACTER_HXX
6 
7 #include "sal/config.h"
8 
9 #include <cassert>
10 #include <cstddef>
11 
12 #include "sal/types.h"
13 
14 #if defined LIBO_INTERNAL_ONLY
15 #include <type_traits>
16 #endif
17 
18 namespace rtl
19 {
28 inline SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code) { return code <= 0x10FFFF; }
29 
38 inline SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
39 {
40  assert(isUnicodeCodePoint(code));
41  return code <= 0x7F;
42 }
43 
44 #if defined LIBO_INTERNAL_ONLY
45 bool isAscii(char) = delete;
46 bool isAscii(signed char) = delete;
47 template <typename T>
48 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
49 isAscii(T code)
50 {
51  return isAscii(sal_uInt32(code));
52 }
53 #endif
54 
64 inline SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
65 {
66  assert(isUnicodeCodePoint(code));
67  return code >= 'a' && code <= 'z';
68 }
69 
70 #if defined LIBO_INTERNAL_ONLY
71 bool isAsciiLowerCase(char) = delete;
72 bool isAsciiLowerCase(signed char) = delete;
73 template <typename T>
74 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
75 isAsciiLowerCase(T code)
76 {
77  return isAsciiLowerCase(sal_uInt32(code));
78 }
79 #endif
80 
90 inline SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
91 {
92  assert(isUnicodeCodePoint(code));
93  return code >= 'A' && code <= 'Z';
94 }
95 
96 #if defined LIBO_INTERNAL_ONLY
97 bool isAsciiUpperCase(char) = delete;
98 bool isAsciiUpperCase(signed char) = delete;
99 template <typename T>
100 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
101 isAsciiUpperCase(T code)
102 {
103  return isAsciiUpperCase(sal_uInt32(code));
104 }
105 #endif
106 
116 inline SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
117 {
118  assert(isUnicodeCodePoint(code));
119  return isAsciiLowerCase(code) || isAsciiUpperCase(code);
120 }
121 
122 #if defined LIBO_INTERNAL_ONLY
123 bool isAsciiAlpha(char) = delete;
124 bool isAsciiAlpha(signed char) = delete;
125 template <typename T>
126 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
127 isAsciiAlpha(T code)
128 {
129  return isAsciiAlpha(sal_uInt32(code));
130 }
131 #endif
132 
142 inline SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
143 {
144  assert(isUnicodeCodePoint(code));
145  return code >= '0' && code <= '9';
146 }
147 
148 #if defined LIBO_INTERNAL_ONLY
149 bool isAsciiDigit(char) = delete;
150 bool isAsciiDigit(signed char) = delete;
151 template <typename T>
152 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
153 isAsciiDigit(T code)
154 {
155  return isAsciiDigit(sal_uInt32(code));
156 }
157 #endif
158 
168 inline SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
169 {
170  assert(isUnicodeCodePoint(code));
171  return isAsciiDigit(code) || isAsciiAlpha(code);
172 }
173 
174 #if defined LIBO_INTERNAL_ONLY
175 bool isAsciiAlphanumeric(char) = delete;
176 bool isAsciiAlphanumeric(signed char) = delete;
177 template <typename T>
178 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
179 isAsciiAlphanumeric(T code)
180 {
181  return isAsciiAlphanumeric(sal_uInt32(code));
182 }
183 #endif
184 
194 inline SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
195 {
196  assert(isUnicodeCodePoint(code));
197  return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
198 }
199 
200 #if defined LIBO_INTERNAL_ONLY
201 bool isAsciiCanonicHexDigit(char) = delete;
202 bool isAsciiCanonicHexDigit(signed char) = delete;
203 template <typename T>
204 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
206 {
207  return isAsciiCanonicHexDigit(sal_uInt32(code));
208 }
209 #endif
210 
220 inline SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
221 {
222  assert(isUnicodeCodePoint(code));
223  return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
224 }
225 
226 #if defined LIBO_INTERNAL_ONLY
227 bool isAsciiHexDigit(char) = delete;
228 bool isAsciiHexDigit(signed char) = delete;
229 template <typename T>
230 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
231 isAsciiHexDigit(T code)
232 {
233  return isAsciiHexDigit(sal_uInt32(code));
234 }
235 #endif
236 
245 inline SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
246 {
247  assert(isUnicodeCodePoint(code));
248  return code >= '0' && code <= '7';
249 }
250 
251 #if defined LIBO_INTERNAL_ONLY
252 bool isAsciiOctalDigit(char) = delete;
253 bool isAsciiOctalDigit(signed char) = delete;
254 template <typename T>
255 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
256 isAsciiOctalDigit(T code)
257 {
258  return isAsciiOctalDigit(sal_uInt32(code));
259 }
260 #endif
261 
271 inline SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
272 {
273  assert(isUnicodeCodePoint(code));
274  return code == ' ' || code == '\f' || code == '\n' || code == '\r' || code == '\t'
275  || code == '\v';
276 }
277 
278 #if defined LIBO_INTERNAL_ONLY
279 bool isAsciiWhiteSpace(char) = delete;
280 bool isAsciiWhiteSpace(signed char) = delete;
281 template <typename T>
282 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
283 isAsciiWhiteSpace(T code)
284 {
285  return isAsciiWhiteSpace(sal_uInt32(code));
286 }
287 #endif
288 
297 inline SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
298 {
299  assert(isUnicodeCodePoint(code));
300  return isAsciiLowerCase(code) ? code - 32 : code;
301 }
302 
303 #if defined LIBO_INTERNAL_ONLY
304 sal_uInt32 toAsciiUpperCase(char) = delete;
305 sal_uInt32 toAsciiUpperCase(signed char) = delete;
306 template <typename T>
307 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32),
308  sal_uInt32>
309 toAsciiUpperCase(T code)
310 {
311  return toAsciiUpperCase(sal_uInt32(code));
312 }
313 #endif
314 
323 inline SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
324 {
325  assert(isUnicodeCodePoint(code));
326  return isAsciiUpperCase(code) ? code + 32 : code;
327 }
328 
329 #if defined LIBO_INTERNAL_ONLY
330 sal_uInt32 toAsciiLowerCase(char) = delete;
331 sal_uInt32 toAsciiLowerCase(signed char) = delete;
332 template <typename T>
333 inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32),
334  sal_uInt32>
335 toAsciiLowerCase(T code)
336 {
337  return toAsciiLowerCase(sal_uInt32(code));
338 }
339 #endif
340 
353 inline SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
354 {
355  assert(isUnicodeCodePoint(code1));
356  assert(isUnicodeCodePoint(code2));
357  return static_cast<sal_Int32>(toAsciiLowerCase(code1))
358  - static_cast<sal_Int32>(toAsciiLowerCase(code2));
359 }
360 
362 namespace detail
363 {
364 sal_uInt32 const surrogatesHighFirst = 0xD800;
365 sal_uInt32 const surrogatesHighLast = 0xDBFF;
366 sal_uInt32 const surrogatesLowFirst = 0xDC00;
367 sal_uInt32 const surrogatesLowLast = 0xDFFF;
368 }
370 
379 inline SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
380 {
381  assert(isUnicodeCodePoint(code));
382  return code >= detail::surrogatesHighFirst && code <= detail::surrogatesLowLast;
383 }
384 
393 inline SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
394 {
395  assert(isUnicodeCodePoint(code));
396  return code >= detail::surrogatesHighFirst && code <= detail::surrogatesHighLast;
397 }
398 
407 inline SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
408 {
409  assert(isUnicodeCodePoint(code));
410  return code >= detail::surrogatesLowFirst && code <= detail::surrogatesLowLast;
411 }
412 
422 {
423  assert(isUnicodeCodePoint(code));
424  assert(code >= 0x10000);
425  return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
426 }
427 
437 {
438  assert(isUnicodeCodePoint(code));
439  assert(code >= 0x10000);
440  return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
441 }
442 
453 inline SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
454 {
455  assert(isHighSurrogate(high));
456  assert(isLowSurrogate(low));
457  return ((high - detail::surrogatesHighFirst) << 10) + (low - detail::surrogatesLowFirst)
458  + 0x10000;
459 }
460 
473 inline SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode* output)
474 {
475  assert(isUnicodeCodePoint(code));
476  assert(output != NULL);
477  if (code < 0x10000)
478  {
479  output[0] = code;
480  return 1;
481  }
482  else
483  {
484  output[0] = getHighSurrogate(code);
485  output[1] = getLowSurrogate(code);
486  return 2;
487  }
488 }
489 
498 inline SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
499 {
500  return isUnicodeCodePoint(code) && !isSurrogate(code);
501 }
502 }
503 
504 #endif
505 
506 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
Check for ASCII character.
Definition: character.hxx:38
SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
Check for ASCII alphabetic character.
Definition: character.hxx:116
SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code)
Check for Unicode code point.
Definition: character.hxx:28
SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
Compare two characters ignoring ASCII case.
Definition: character.hxx:353
SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
Check for Unicode scalar value.
Definition: character.hxx:498
SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
Check for ASCII upper case character.
Definition: character.hxx:90
sal_uInt16 sal_Unicode
Definition: types.h:103
SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode *output)
Split a Unicode code point into UTF-16 code units.
Definition: character.hxx:473
SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
Check for surrogate.
Definition: character.hxx:379
SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
Check for ASCII digit character.
Definition: character.hxx:142
SAL_CONSTEXPR sal_Unicode getLowSurrogate(sal_uInt32 code)
Get low surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:436
SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
Check for ASCII lower case character.
Definition: character.hxx:64
SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
Check for ASCII canonic hexadecimal digit character.
Definition: character.hxx:194
SAL_CONSTEXPR sal_Unicode getHighSurrogate(sal_uInt32 code)
Get high surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:421
#define SAL_CONSTEXPR
C++11 &quot;constexpr&quot; feature.
Definition: types.h:384
SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
Combine surrogates to form a code point.
Definition: character.hxx:453
SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
Check for low surrogate.
Definition: character.hxx:407
SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
Check for ASCII octal digit character.
Definition: character.hxx:245
SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
Check for ASCII white space character.
Definition: character.hxx:271
SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
Convert a character, if ASCII, to upper case.
Definition: character.hxx:297
SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
Check for high surrogate.
Definition: character.hxx:393
SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
Check for ASCII alphanumeric character.
Definition: character.hxx:168
SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
Convert a character, if ASCII, to lower case.
Definition: character.hxx:323
SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
Check for ASCII hexadecimal digit character.
Definition: character.hxx:220