2 * Byte sequences for various UTF-8 entities
4 * Wireshark - Network traffic analyzer
5 * By Gerald Combs <gerald@wireshark.org>
6 * Copyright 1998 Gerald Combs
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #ifndef __UTF8_ENTITIES_H__
25 #define __UTF8_ENTITIES_H__
28 * Sequences can be found at
29 * http://www.fileformat.info/info/unicode/
30 * http://www.utf8-chartable.de/
33 * Please be conservative when adding code points below. While many modern
34 * systems default to UTF-8 and handle it well, some do not. The Windows
35 * console is a notable example. As a general rule you probably shouldn't
36 * stray too far from code page 437 or WGL4:
37 * https://en.wikipedia.org/wiki/Code_page_437
38 * https://en.wikipedia.org/wiki/Windows_Glyph_List_4
40 * Hopefully we can dispense with the sequences below and simply encode our
41 * files as UTF 8 at some point. For example gcc has supported UTF 8 since
42 * at least 3.4. Visual C++ on the other hand is much more problematic.
43 * 2015 and later support /source-charset:utf-8, but prior versions appear
44 * to require a UTF 8 BOM.
47 #define UTF8_DEGREE_SIGN "\xc2\xb0" /* 176 / 0xb0 */
48 #define UTF8_SUPERSCRIPT_TWO "\xc2\xb2" /* 178 / 0xb2 */
49 #define UTF8_MICRO_SIGN "\xc2\xb5" /* 181 / 0xb5 */
50 #define UTF8_MIDDLE_DOT "\xc2\xb7" /* 183 / 0xb7 */
52 #define UTF8_BULLET "\xe2\x80\xa2" /* 8226 / 0x2024 */
53 #define UTF8_EM_DASH "\xe2\x80\x94" /* 8212 / 0x2014 */
54 #define UTF8_HORIZONTAL_ELLIPSIS "\xe2\x80\xa6" /* 8230 / 0x2026 */
56 #define UTF8_LEFTWARDS_ARROW "\xe2\x86\x90" /* 8592 / 0x2190 */
57 #define UTF8_RIGHTWARDS_ARROW "\xe2\x86\x92" /* 8594 / 0x2192 */
58 #define UTF8_LEFT_RIGHT_ARROW "\xe2\x86\x94" /* 8596 / 0x2194 */
60 /* macOS command key */
61 #define UTF8_PLACE_OF_INTEREST_SIGN "\xe2\x8c\x98" /* 8984 / 0x2318 */
63 #define UTF8_SYMBOL_FOR_NULL "\xe2\x90\x80" /* 9216 / 0x2400 */
65 #define UTF8_CHECK_MARK "\xe2\x9c\x93" /* 10003 / 0x2713 */
66 #define UTF8_BALLOT_X "\xe2\x9c\x97" /* 10007 / 0x2717 */
67 #define UTF8_LONG_RIGHTWARDS_ARROW "\xe2\x9f\xb6" /* 10230 / 0x27f6 */
69 #define UTF8_ZERO_WIDTH_NO_BREAK_SPACE "\xef\xbb\xbf" /* 65279 / 0xffef */
70 #define UTF8_BOM UTF8_ZERO_WIDTH_NO_BREAK_SPACE
72 #endif /* __UTF8_ENTITIES_H__ */
80 * indent-tabs-mode: nil
83 * ex: set shiftwidth=4 tabstop=8 expandtab
84 * :indentSize=4:tabSize=8:noTabs=true: