779.3.48
by Monty Taylor
Split out U8_ macros into their own header and added the IBM copyright notice to them. |
1 |
/* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
|
2 |
vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
|
|
3 |
||
4 |
Copyright (c) 1995-2006 International Business Machines Corporation and others
|
|
5 |
||
6 |
All rights reserved.
|
|
7 |
||
8 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
9 |
of this software and associated documentation files (the "Software"),
|
|
10 |
to deal in the Software without restriction, including without limitation
|
|
11 |
the rights to use, copy, modify, merge, publish, distribute, and/or sell
|
|
12 |
copies of the Software, and to permit persons
|
|
13 |
to whom the Software is furnished to do so, provided that the above
|
|
14 |
copyright notice(s) and this permission notice appear in all copies
|
|
15 |
of the Software and that both the above copyright notice(s) and this
|
|
16 |
permission notice appear in supporting documentation.
|
|
17 |
||
18 |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
19 |
ED,
|
|
20 |
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
21 |
PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
|
|
22 |
THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
|
|
23 |
OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
|
|
24 |
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
25 |
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
|
|
26 |
USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
27 |
||
28 |
Except as contained in this notice, the name of a copyright holder shall not be
|
|
29 |
used in advertising or otherwise to promote the sale, use or other dealings in
|
|
30 |
this Software without prior written authorization of the copyright holder.
|
|
31 |
*/
|
|
32 |
||
33 |
#ifndef MYSTRINGS_UTF8_H
|
|
34 |
#define MYSTRINGS_UTF8_H
|
|
35 |
||
36 |
/**
|
|
37 |
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
|
|
38 |
* @param c 8-bit code unit (byte)
|
|
39 |
* @return TRUE or FALSE
|
|
40 |
* @stable ICU 2.4
|
|
41 |
*/
|
|
42 |
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
|
|
43 |
||
44 |
/**
|
|
45 |
* Is this code unit (byte) a UTF-8 lead byte?
|
|
46 |
* @param c 8-bit code unit (byte)
|
|
47 |
* @return TRUE or FALSE
|
|
48 |
* @stable ICU 2.4
|
|
49 |
*/
|
|
50 |
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
|
|
51 |
||
52 |
/**
|
|
53 |
* Is this code unit (byte) a UTF-8 trail byte?
|
|
54 |
* @param c 8-bit code unit (byte)
|
|
55 |
* @return TRUE or FALSE
|
|
56 |
* @stable ICU 2.4
|
|
57 |
*/
|
|
58 |
#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
|
|
59 |
||
60 |
/**
|
|
61 |
* How many code units (bytes) are used for the UTF-8 encoding
|
|
62 |
* of this Unicode code point?
|
|
63 |
* @param c 32-bit code point
|
|
64 |
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
|
|
65 |
* @stable ICU 2.4
|
|
66 |
*/
|
|
67 |
#define U8_LENGTH(c) \
|
|
68 |
((uint32_t)(c)<=0x7f ? 1 : \
|
|
69 |
((uint32_t)(c)<=0x7ff ? 2 : \
|
|
70 |
((uint32_t)(c)<=0xd7ff ? 3 : \
|
|
71 |
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
|
|
72 |
((uint32_t)(c)<=0xffff ? 3 : 4)\
|
|
73 |
) \
|
|
74 |
) \
|
|
75 |
) \
|
|
76 |
)
|
|
77 |
||
78 |
/**
|
|
79 |
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
|
|
80 |
* @return 4
|
|
81 |
* @stable ICU 2.4
|
|
82 |
*/
|
|
83 |
#define U8_MAX_LENGTH 4
|
|
84 |
||
85 |
#endif /* MYSTRINGS_UTF8_H */ |