GNU Unifont 17.0.01
Pan-Unicode font with complete Unicode Plane 0 coverage and partial coverage of higher planes
unigenwidth.c
Go to the documentation of this file.
1/**
2 @file unigenwidth.c
3
4 @brief unigenwidth - IEEE 1003.1-2008 setup to calculate
5 wchar_t string widths
6
7 @author Paul Hardy.
8
9 @copyright Copyright (C) 2013, 2017 Paul Hardy.
10
11 All glyphs are treated as 16 pixels high, and can be
12 8, 16, 24, or 32 pixels wide (resulting in widths of
13 1, 2, 3, or 4, respectively).
14*/
15/*
16 LICENSE:
17
18 This program is free software: you can redistribute it and/or modify
19 it under the terms of the GNU General Public License as published by
20 the Free Software Foundation, either version 2 of the License, or
21 (at your option) any later version.
22
23 This program is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 GNU General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program. If not, see <http://www.gnu.org/licenses/>.
30*/
31
32/*
33 20 June 2017 [Paul Hardy]:
34 - Now handles glyphs that are 24 or 32 pixels wide.
35
36 8 July 2017 [Paul Hardy]:
37 - Modifies sscanf format strings to ignore second field after
38 the ":" field separator, newly added to "*combining.txt" files
39 and already present in "*.hex" files.
40
41 6 September 2025 [Paul Hardy]:
42 - Changed loc from "int" to "unsigned" for compatibility with
43 sscanf definition.
44*/
45
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49
50#define MAXSTRING 256 ///< Maximum input line length - 1.
51
52/* Definitions for Pikto in Plane 15 */
53#define PIKTO_START 0x0F0E70 ///< Start of Pikto code point range.
54#define PIKTO_END 0x0F11EF ///< End of Pikto code point range.
55/** Number of code points in Pikto range. */
56#define PIKTO_SIZE (PIKTO_END - PIKTO_START + 1)
57
58
59/**
60 @brief The main function.
61
62 @param[in] argc The count of command line arguments.
63 @param[in] argv Pointer to array of command line arguments.
64 @return This program exits with status EXIT_SUCCESS.
65*/
66int
67main (int argc, char **argv)
68{
69
70 int i; /* loop variable */
71
72 char teststring[MAXSTRING];
73 unsigned loc;
74 char *gstart;
75
76 char glyph_width[0x20000];
77 char pikto_width[PIKTO_SIZE];
78
79 FILE *infilefp;
80
81 if (argc != 3) {
82 fprintf (stderr, "\n\nUsage: %s <unifont.hex> <combining.txt>\n\n", argv[0]);
83 exit (EXIT_FAILURE);
84 }
85
86 /*
87 Read the collection of hex glyphs.
88 */
89 if ((infilefp = fopen (argv[1],"r")) == NULL) {
90 fprintf (stderr,"ERROR - hex input file %s not found.\n\n", argv[1]);
91 exit (EXIT_FAILURE);
92 }
93
94 /* Flag glyph as non-existent until found. */
95 memset (glyph_width, -1, 0x20000 * sizeof (char));
96 memset (pikto_width, -1, (PIKTO_SIZE) * sizeof (char));
97
98 teststring[MAXSTRING-1] = '\0';
99 while (fgets (teststring, MAXSTRING-1, infilefp) != NULL) {
100 sscanf (teststring, "%X:%*s", &loc);
101 if (loc < 0x20000) {
102 gstart = strchr (teststring,':') + 1;
103 /*
104 16 rows per glyph, 2 ASCII hexadecimal digits per byte,
105 so divide number of digits by 32 (shift right 5 bits).
106 */
107 glyph_width[loc] = (strlen (gstart) - 1) >> 5;
108 }
109 else if ((loc >= PIKTO_START) && (loc <= PIKTO_END)) {
110 gstart = strchr (teststring,':') + 1;
111 pikto_width[loc - PIKTO_START] = strlen (gstart) <= 34 ? 1 : 2;
112 }
113 }
114
115 fclose (infilefp);
116
117 /*
118 Now read the combining character code points. These have width of 0.
119 */
120 if ((infilefp = fopen (argv[2],"r")) == NULL) {
121 fprintf (stderr,"ERROR - combining characters file %s not found.\n\n", argv[2]);
122 exit (EXIT_FAILURE);
123 }
124
125 while (fgets (teststring, MAXSTRING-1, infilefp) != NULL) {
126 sscanf (teststring, "%X:%*s", &loc);
127 if (loc < 0x20000) glyph_width[loc] = 0;
128 }
129
130 fclose (infilefp);
131
132 /*
133 Code Points with Unusual Properties (Unicode Standard, Chapter 4).
134
135 As of Unifont 10.0.04, use the widths in the "*-nonprinting.hex"
136 files. If an application is smart enough to know how to handle
137 these special cases, it will not render the "nonprinting" glyph
138 and will treat the code point as being zero-width.
139 */
140// glyph_width[0]=0; /* NULL character */
141// for (i = 0x0001; i <= 0x001F; i++) glyph_width[i]=-1; /* Control Characters */
142// for (i = 0x007F; i <= 0x009F; i++) glyph_width[i]=-1; /* Control Characters */
143
144// glyph_width[0x034F]=0; /* combining grapheme joiner */
145// glyph_width[0x180B]=0; /* Mongolian free variation selector one */
146// glyph_width[0x180C]=0; /* Mongolian free variation selector two */
147// glyph_width[0x180D]=0; /* Mongolian free variation selector three */
148// glyph_width[0x180E]=0; /* Mongolian vowel separator */
149// glyph_width[0x200B]=0; /* zero width space */
150// glyph_width[0x200C]=0; /* zero width non-joiner */
151// glyph_width[0x200D]=0; /* zero width joiner */
152// glyph_width[0x200E]=0; /* left-to-right mark */
153// glyph_width[0x200F]=0; /* right-to-left mark */
154// glyph_width[0x202A]=0; /* left-to-right embedding */
155// glyph_width[0x202B]=0; /* right-to-left embedding */
156// glyph_width[0x202C]=0; /* pop directional formatting */
157// glyph_width[0x202D]=0; /* left-to-right override */
158// glyph_width[0x202E]=0; /* right-to-left override */
159// glyph_width[0x2060]=0; /* word joiner */
160// glyph_width[0x2061]=0; /* function application */
161// glyph_width[0x2062]=0; /* invisible times */
162// glyph_width[0x2063]=0; /* invisible separator */
163// glyph_width[0x2064]=0; /* invisible plus */
164// glyph_width[0x206A]=0; /* inhibit symmetric swapping */
165// glyph_width[0x206B]=0; /* activate symmetric swapping */
166// glyph_width[0x206C]=0; /* inhibit arabic form shaping */
167// glyph_width[0x206D]=0; /* activate arabic form shaping */
168// glyph_width[0x206E]=0; /* national digit shapes */
169// glyph_width[0x206F]=0; /* nominal digit shapes */
170
171// /* Variation Selector-1 to Variation Selector-16 */
172// for (i = 0xFE00; i <= 0xFE0F; i++) glyph_width[i] = 0;
173
174// glyph_width[0xFEFF]=0; /* zero width no-break space */
175// glyph_width[0xFFF9]=0; /* interlinear annotation anchor */
176// glyph_width[0xFFFA]=0; /* interlinear annotation separator */
177// glyph_width[0xFFFB]=0; /* interlinear annotation terminator */
178 /*
179 Let glyph widths represent 0xFFFC (object replacement character)
180 and 0xFFFD (replacement character).
181 */
182
183 /*
184 Hangul Jamo:
185
186 Leading Consonant (Choseong): leave spacing as is.
187
188 Hangul Choseong Filler (U+115F): set width to 2.
189
190 Hangul Jungseong Filler, Hangul Vowel (Jungseong), and
191 Final Consonant (Jongseong): set width to 0, because these
192 combine with the leading consonant as one composite syllabic
193 glyph. As of Unicode 5.2, the Hangul Jamo block (U+1100..U+11FF)
194 is completely filled.
195 */
196 // for (i = 0x1160; i <= 0x11FF; i++) glyph_width[i]=0; /* Vowels & Final Consonants */
197
198 /*
199 Private Use Area -- the width is undefined, but likely
200 to be 2 charcells wide either from a graphic glyph or
201 from a four-digit hexadecimal glyph representing the
202 code point. Therefore if any PUA glyph does not have
203 a non-zero width yet, assign it a default width of 2.
204 The Unicode Standard allows giving PUA characters
205 default property values; see for example The Unicode
206 Standard Version 5.0, p. 91. This same default is
207 used for higher plane PUA code points below.
208 */
209 // for (i = 0xE000; i <= 0xF8FF; i++) {
210 // if (glyph_width[i] == 0) glyph_width[i]=2;
211 // }
212
213 /*
214 <not a character>
215 */
216 for (i = 0xFDD0; i <= 0xFDEF; i++) glyph_width[i] = -1;
217 glyph_width[0xFFFE] = -1; /* Byte Order Mark */
218 glyph_width[0xFFFF] = -1; /* Byte Order Mark */
219
220 /* Surrogate Code Points */
221 for (i = 0xD800; i <= 0xDFFF; i++) glyph_width[i]=-1;
222
223 /* CJK Code Points */
224 for (i = 0x4E00; i <= 0x9FFF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
225 for (i = 0x3400; i <= 0x4DBF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
226 for (i = 0xF900; i <= 0xFAFF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
227
228 /*
229 Now generate the output file.
230 */
231 printf ("/*\n");
232 printf (" wcwidth and wcswidth functions, as per IEEE 1003.1-2008\n");
233 printf (" System Interfaces, pp. 2241 and 2251.\n\n");
234 printf (" Author: Paul Hardy, 2013\n\n");
235 printf (" Copyright (c) 2013 Paul Hardy\n\n");
236 printf (" LICENSE:\n");
237 printf ("\n");
238 printf (" This program is free software: you can redistribute it and/or modify\n");
239 printf (" it under the terms of the GNU General Public License as published by\n");
240 printf (" the Free Software Foundation, either version 2 of the License, or\n");
241 printf (" (at your option) any later version.\n");
242 printf ("\n");
243 printf (" This program is distributed in the hope that it will be useful,\n");
244 printf (" but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
245 printf (" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n");
246 printf (" GNU General Public License for more details.\n");
247 printf ("\n");
248 printf (" You should have received a copy of the GNU General Public License\n");
249 printf (" along with this program. If not, see <http://www.gnu.org/licenses/>.\n");
250 printf ("*/\n\n");
251
252 printf ("#include <wchar.h>\n\n");
253 printf ("/* Definitions for Pikto CSUR Private Use Area glyphs */\n");
254 printf ("#define PIKTO_START\t0x%06X\n", PIKTO_START);
255 printf ("#define PIKTO_END\t0x%06X\n", PIKTO_END);
256 printf ("#define PIKTO_SIZE\t(PIKTO_END - PIKTO_START + 1)\n");
257 printf ("\n\n");
258 printf ("/* wcwidth -- return charcell positions of one code point */\n");
259 printf ("inline int\nwcwidth (wchar_t wc)\n{\n");
260 printf (" return (wcswidth (&wc, 1));\n");
261 printf ("}\n");
262 printf ("\n\n");
263 printf ("int\nwcswidth (const wchar_t *pwcs, size_t n)\n{\n\n");
264 printf (" int i; /* loop variable */\n");
265 printf (" unsigned codept; /* Unicode code point of current character */\n");
266 printf (" unsigned plane; /* Unicode plane, 0x00..0x10 */\n");
267 printf (" unsigned lower17; /* lower 17 bits of Unicode code point */\n");
268 printf (" unsigned lower16; /* lower 16 bits of Unicode code point */\n");
269 printf (" int lowpt, midpt, highpt; /* for binary searching in plane1zeroes[] */\n");
270 printf (" int found; /* for binary searching in plane1zeroes[] */\n");
271 printf (" int totalwidth; /* total width of string, in charcells (1 or 2/glyph) */\n");
272 printf (" int illegalchar; /* Whether or not this code point is illegal */\n");
273 putchar ('\n');
274
275 /*
276 Print the glyph_width[] array for glyphs widths in the
277 Basic Multilingual Plane (Plane 0).
278 */
279 printf (" char glyph_width[0x20000] = {");
280 for (i = 0; i < 0x10000; i++) {
281 if ((i & 0x1F) == 0)
282 printf ("\n /* U+%04X */ ", i);
283 printf ("%d,", glyph_width[i]);
284 }
285 for (i = 0x10000; i < 0x20000; i++) {
286 if ((i & 0x1F) == 0)
287 printf ("\n /* U+%06X */ ", i);
288 printf ("%d", glyph_width[i]);
289 if (i < 0x1FFFF) putchar (',');
290 }
291 printf ("\n };\n\n");
292
293 /*
294 Print the pikto_width[] array for Pikto glyph widths.
295 */
296 printf (" char pikto_width[PIKTO_SIZE] = {");
297 for (i = 0; i < PIKTO_SIZE; i++) {
298 if ((i & 0x1F) == 0)
299 printf ("\n /* U+%06X */ ", PIKTO_START + i);
300 printf ("%d", pikto_width[i]);
301 if ((PIKTO_START + i) < PIKTO_END) putchar (',');
302 }
303 printf ("\n };\n\n");
304
305 /*
306 Execution part of wcswidth.
307 */
308 printf ("\n");
309 printf (" illegalchar = totalwidth = 0;\n");
310 printf (" for (i = 0; !illegalchar && i < n; i++) {\n");
311 printf (" codept = pwcs[i];\n");
312 printf (" plane = codept >> 16;\n");
313 printf (" lower17 = codept & 0x1FFFF;\n");
314 printf (" lower16 = codept & 0xFFFF;\n");
315 printf (" if (plane < 2) { /* the most common case */\n");
316 printf (" if (glyph_width[lower17] < 0) illegalchar = 1;\n");
317 printf (" else totalwidth += glyph_width[lower17];\n");
318 printf (" }\n");
319 printf (" else { /* a higher plane or beyond Unicode range */\n");
320 printf (" if ((lower16 == 0xFFFE) || (lower16 == 0xFFFF)) {\n");
321 printf (" illegalchar = 1;\n");
322 printf (" }\n");
323 printf (" else if (plane < 4) { /* Ideographic Plane */\n");
324 printf (" totalwidth += 2; /* Default ideographic width */\n");
325 printf (" }\n");
326 printf (" else if (plane == 0x0F) { /* CSUR Private Use Area */\n");
327 printf (" if (lower16 <= 0x0E6F) { /* Kinya */\n");
328 printf (" totalwidth++; /* all Kinya syllables have width 1 */\n");
329 printf (" }\n");
330 printf (" else if (lower16 <= (PIKTO_END & 0xFFFF)) { /* Pikto */\n");
331 printf (" if (pikto_width[lower16 - (PIKTO_START & 0xFFFF)] < 0) illegalchar = 1;\n");
332 printf (" else totalwidth += pikto_width[lower16 - (PIKTO_START & 0xFFFF)];\n");
333 printf (" }\n");
334 printf (" }\n");
335 printf (" else if (plane > 0x10) {\n");
336 printf (" illegalchar = 1;\n");
337 printf (" }\n");
338 printf (" /* Other non-printing in higher planes; return -1 as per IEEE 1003.1-2008. */\n");
339 printf (" else if (/* language tags */\n");
340 printf (" codept == 0x0E0001 || (codept >= 0x0E0020 && codept <= 0x0E007F) ||\n");
341 printf (" /* variation selectors, 0x0E0100..0x0E01EF */\n");
342 printf (" (codept >= 0x0E0100 && codept <= 0x0E01EF)) {\n");
343 printf (" illegalchar = 1;\n");
344 printf (" }\n");
345 printf (" /*\n");
346 printf (" Unicode plane 0x02..0x10 printing character\n");
347 printf (" */\n");
348 printf (" else {\n");
349 printf (" illegalchar = 1; /* code is not in font */\n");
350 printf (" }\n");
351 printf ("\n");
352 printf (" }\n");
353 printf (" }\n");
354 printf (" if (illegalchar) totalwidth = -1;\n");
355 printf ("\n");
356 printf (" return (totalwidth);\n");
357 printf ("\n");
358 printf ("}\n");
359
360 exit (EXIT_SUCCESS);
361}
int main(void)
The main function.
Definition: unibdf2hex.c:46
#define MAXSTRING
Maximum input line length - 1.
Definition: unigenwidth.c:50
#define PIKTO_START
Start of Pikto code point range.
Definition: unigenwidth.c:53
#define PIKTO_SIZE
Definition: unigenwidth.c:56
#define PIKTO_END
End of Pikto code point range.
Definition: unigenwidth.c:54