GNU Unifont 17.0.02
Pan-Unicode font with complete Unicode Plane 0 coverage and partial coverage of higher planes
unigen-hangul.c
Go to the documentation of this file.
1/**
2 @file unigen-hangul.c
3
4 @brief Generate arbitrary hangul syllables.
5
6 Input is a Unifont .hex file such as the "hangul-base.hex" file that
7 is included in the Unifont package.
8
9 The default program parameters will generate the Unicode
10 Hangul Syllables range of U+AC00..U+D7A3. The syllables
11 will appear in this order:
12
13 For each modern choseong {
14 For each modern jungseong {
15 Output syllable of choseong and jungseong
16 For each modern jongseong {
17 Output syllable of choseong + jungseong + jongseong
18 }
19 }
20 }
21
22 By starting the jongseong code point at one before the first
23 valid jongseong, the first inner loop iteration will add a
24 blank glyph for the jongseong portion of the syllable, so
25 only the current choseong and jungseong will be output first.
26
27 @author Paul Hardy
28
29 @copyright Copyright © 2023 Paul Hardy
30*/
31/*
32 LICENSE:
33
34 This program is free software: you can redistribute it and/or modify
35 it under the terms of the GNU General Public License as published by
36 the Free Software Foundation, either version 2 of the License, or
37 (at your option) any later version.
38
39 This program is distributed in the hope that it will be useful,
40 but WITHOUT ANY WARRANTY; without even the implied warranty of
41 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
42 GNU General Public License for more details.
43
44 You should have received a copy of the GNU General Public License
45 along with this program. If not, see <http://www.gnu.org/licenses/>.
46*/
47
48#include <stdio.h>
49#include <stdlib.h>
50#include "hangul.h"
51
52// #define DEBUG
53
54
55struct PARAMS {
56 unsigned starting_codept; /* First output Unicode code point. */
57 unsigned cho_start, cho_end; /* Choseong start and end code points. */
58 unsigned jung_start, jung_end; /* Jungseong start and end code points. */
59 unsigned jong_start, jong_end; /* Jongseong start and end code points. */
60 FILE *infp;
61 FILE *outfp;
62};
63
64
65/**
66 @brief Program entry point.
67*/
68int
69main (int argc, char *argv[]) {
70
71 int i; /* loop variable */
72 unsigned codept;
73 unsigned max_codept;
74 unsigned glyph[MAX_GLYPHS][16];
75 unsigned tmp_glyph [16]; /* To build one combined glyph at a time. */
76 int cho, jung, jong; /* The 3 components in a Hangul syllable. */
77
78 /// Default parameters for Hangul syllable generation.
79 struct PARAMS params = { 0xAC00, /* Starting output Unicode code point */
80 0x1100, /* First modern choseong */
81 0x1112, /* Last modern choseong */
82 0x1161, /* First modern jungseong */
83 0x1175, /* Last modern jungseong */
84 0x11A7, /* One before first modern jongseong */
85 0x11C2, /* Last modern jongseong */
86 stdin, /* Default input file pointer */
87 stdout /* Default output file pointer */
88 };
89
90 void parse_args (int argc, char *argv[], struct PARAMS *params);
91
92 unsigned hangul_read_base16 (FILE *infp, unsigned glyph[][16]);
93
94 void print_glyph_hex (FILE *fp, unsigned codept, unsigned *this_glyph);
95
96 void combined_jamo (unsigned glyph [MAX_GLYPHS][16],
97 unsigned cho, unsigned jung, unsigned jong,
98 unsigned *combined_glyph);
99
100
101 if (argc > 1) {
102 parse_args (argc, argv, &params);
103
104#ifdef DEBUG
105 fprintf (stderr,
106 "Range: (U+%04X, U+%04X, U+%04X) to (U+%04X, U+%04X, U+%04X)\n",
107 params.cho_start, params.jung_start, params.jong_start,
108 params.cho_end, params.jung_end, params.jong_end);
109#endif
110 }
111
112 /*
113 Initialize glyph array to all zeroes.
114 */
115 for (codept = 0; codept < MAX_GLYPHS; codept++) {
116 for (i = 0; i < 16; i++) glyph[codept][i] = 0x0000;
117 }
118
119 /*
120 Read Hangul base glyph file.
121 */
122 max_codept = hangul_read_base16 (params.infp, glyph);
123 if (max_codept > 0x8FF) {
124 fprintf (stderr, "\nWARNING: Hangul glyph range exceeds PUA space.\n\n");
125 }
126
127 codept = params.starting_codept; /* First code point to output */
128
129 for (cho = params.cho_start; cho <= params.cho_end; cho++) {
130 for (jung = params.jung_start; jung <= params.jung_end; jung++) {
131 for (jong = params.jong_start; jong <= params.jong_end; jong++) {
132
133#ifdef DEBUG
134 fprintf (params.outfp,
135 "(U+%04X, U+%04X, U+%04X)\n",
136 cho, jung, jong);
137#endif
138 combined_jamo (glyph, cho, jung, jong, tmp_glyph);
139 print_glyph_hex (params.outfp, codept, tmp_glyph);
140 codept++;
141 if (jong == JONG_UNICODE_END)
142 jong = JONG_EXTB_UNICODE_START - 1; /* Start Extended-B range */
143 }
144 if (jung == JUNG_UNICODE_END)
145 jung = JUNG_EXTB_UNICODE_START - 1; /* Start Extended-B range */
146 }
147 if (cho == CHO_UNICODE_END)
148 cho = CHO_EXTA_UNICODE_START - 1; /* Start Extended-A range */
149 }
150
151 if (params.infp != stdin) fclose (params.infp);
152 if (params.outfp != stdout) fclose (params.outfp);
153
154 exit (EXIT_SUCCESS);
155}
156
157
158/**
159 @brief Parse command line arguments.
160
161*/
162void
163parse_args (int argc, char *argv[], struct PARAMS *params) {
164 int arg_count; /* Current index into argv[]. */
165
166 void get_hex_range (char *instring, unsigned *start, unsigned *end);
167
168 int strncmp (const char *s1, const char *s2, size_t n);
169
170
171 arg_count = 1;
172
173 while (arg_count < argc) {
174 /* If all 600,000+ Hangul syllables are requested. */
175 if (strncmp (argv [arg_count], "-all", 4) == 0) {
176 params->starting_codept = 0x0001;
177 params->cho_start = CHO_UNICODE_START; /* First modern choseong */
178 params->cho_end = CHO_EXTA_UNICODE_END; /* Last ancient choseong */
179 params->jung_start = JUNG_UNICODE_START; /* First modern jungseong */
180 params->jung_end = JUNG_EXTB_UNICODE_END; /* Last ancient jungseong */
181 params->jong_start = JONG_UNICODE_START - 1; /* One before first modern jongseong */
182 params->jong_end = JONG_EXTB_UNICODE_END; /* Last andient jongseong */
183 }
184 /* If starting code point for output Unifont hex file is specified. */
185 else if (strncmp (argv [arg_count], "-c", 2) == 0) {
186 arg_count++;
187 if (arg_count < argc) {
188 sscanf (argv [arg_count], "%X", &params->starting_codept);
189 }
190 }
191 /* If initial consonant (choseong) range, "jamo 1", get range. */
192 else if (strncmp (argv [arg_count], "-j1", 3) == 0) {
193 arg_count++;
194 if (arg_count < argc) {
195 get_hex_range (argv [arg_count],
196 &params->cho_start, &params->cho_end);
197 /*
198 Allow one initial blank glyph at start of a loop, none at end.
199 */
200 if (params->cho_start < CHO_UNICODE_START) {
201 params->cho_start = CHO_UNICODE_START - 1;
202 }
203 else if (params->cho_start > CHO_UNICODE_END &&
204 params->cho_start < CHO_EXTA_UNICODE_START) {
205 params->cho_start = CHO_EXTA_UNICODE_START - 1;
206 }
207 /*
208 Do not go past desired Hangul choseong range,
209 Hangul Jamo or Hangul Jamo Extended-A choseong.
210 */
211 if (params->cho_end > CHO_EXTA_UNICODE_END) {
212 params->cho_end = CHO_EXTA_UNICODE_END;
213 }
214 else if (params->cho_end > CHO_UNICODE_END &&
215 params->cho_end < CHO_EXTA_UNICODE_START) {
216 params->cho_end = CHO_UNICODE_END;
217 }
218 }
219 }
220 /* If medial vowel (jungseong) range, "jamo 2", get range. */
221 else if (strncmp (argv [arg_count], "-j2", 3) == 0) {
222 arg_count++;
223 if (arg_count < argc) {
224 get_hex_range (argv [arg_count],
225 &params->jung_start, &params->jung_end);
226 /*
227 Allow one initial blank glyph at start of a loop, none at end.
228 */
229 if (params->jung_start < JUNG_UNICODE_START) {
230 params->jung_start = JUNG_UNICODE_START - 1;
231 }
232 else if (params->jung_start > JUNG_UNICODE_END &&
233 params->jung_start < JUNG_EXTB_UNICODE_START) {
234 params->jung_start = JUNG_EXTB_UNICODE_START - 1;
235 }
236 /*
237 Do not go past desired Hangul jungseong range,
238 Hangul Jamo or Hangul Jamo Extended-B jungseong.
239 */
240 if (params->jung_end > JUNG_EXTB_UNICODE_END) {
241 params->jung_end = JUNG_EXTB_UNICODE_END;
242 }
243 else if (params->jung_end > JUNG_UNICODE_END &&
244 params->jung_end < JUNG_EXTB_UNICODE_START) {
245 params->jung_end = JUNG_UNICODE_END;
246 }
247 }
248 }
249 /* If final consonant (jongseong) range, "jamo 3", get range. */
250 else if (strncmp (argv [arg_count], "-j3", 3) == 0) {
251 arg_count++;
252 if (arg_count < argc) {
253 get_hex_range (argv [arg_count],
254 &params->jong_start, &params->jong_end);
255 /*
256 Allow one initial blank glyph at start of a loop, none at end.
257 */
258 if (params->jong_start < JONG_UNICODE_START) {
259 params->jong_start = JONG_UNICODE_START - 1;
260 }
261 else if (params->jong_start > JONG_UNICODE_END &&
262 params->jong_start < JONG_EXTB_UNICODE_START) {
263 params->jong_start = JONG_EXTB_UNICODE_START - 1;
264 }
265 /*
266 Do not go past desired Hangul jongseong range,
267 Hangul Jamo or Hangul Jamo Extended-B jongseong.
268 */
269 if (params->jong_end > JONG_EXTB_UNICODE_END) {
270 params->jong_end = JONG_EXTB_UNICODE_END;
271 }
272 else if (params->jong_end > JONG_UNICODE_END &&
273 params->jong_end < JONG_EXTB_UNICODE_START) {
274 params->jong_end = JONG_UNICODE_END;
275 }
276 }
277 }
278 /* If input file is specified, open it for read access. */
279 else if (strncmp (argv [arg_count], "-i", 2) == 0) {
280 arg_count++;
281 if (arg_count < argc) {
282 params->infp = fopen (argv [arg_count], "r");
283 if (params->infp == NULL) {
284 fprintf (stderr, "\n*** ERROR: Cannot open %s for input.\n\n",
285 argv [arg_count]);
286 exit (EXIT_FAILURE);
287 }
288 }
289 }
290 /* If output file is specified, open it for write access. */
291 else if (strncmp (argv [arg_count], "-o", 2) == 0) {
292 arg_count++;
293 if (arg_count < argc) {
294 params->outfp = fopen (argv [arg_count], "w");
295 if (params->outfp == NULL) {
296 fprintf (stderr, "\n*** ERROR: Cannot open %s for output.\n\n",
297 argv [arg_count]);
298 exit (EXIT_FAILURE);
299 }
300 }
301 }
302 /* If help is requested, print help message and exit. */
303 else if (strncmp (argv [arg_count], "-h", 2) == 0 ||
304 strncmp (argv [arg_count], "--help", 6) == 0) {
305 printf ("\nunigen-hangul [options]\n\n");
306 printf (" Generates Hangul syllables from an input Unifont .hex file encoded\n");
307 printf (" in Johab 6/3/1 format. By default, the output is the Unicode Hangul\n");
308 printf (" Syllables range, U+AC00..U+D7A3. Options allow the user to specify\n");
309 printf (" a starting code point for the output Unifont .hex file, and ranges\n");
310 printf (" in hexadecimal of the starting and ending Hangul Jamo code points:\n\n");
311
312 printf (" * 1100-115E Initial consonants (choseong)\n");
313 printf (" * 1161-11A7 Medial vowels (jungseong)\n");
314 printf (" * 11A8-11FF Final consonants (jongseong).\n\n");
315
316 printf (" A single code point or 0 to omit can be specified instead of a range.\n\n");
317
318 printf (" Option Parameters Function\n");
319 printf (" ------ ---------- --------\n");
320 printf (" -h, --help Print this message and exit.\n\n");
321 printf (" -all Generate all Hangul syllables, using all modern and\n");
322 printf (" ancient Hangul in the Unicode range U+1100..U+11FF,\n");
323 printf (" U+A960..U+A97C, and U+D7B0..U+D7FB.\n");
324 printf (" WARNING: this will generate over 1,600,000 syllables\n");
325 printf (" in a 115 megabyte Unifont .hex format file. The\n");
326 printf (" default is to only output modern Hangul syllables.\n\n");
327 printf (" -c code_point Starting code point in hexadecimal for output file.\n\n");
328 printf (" -j1 start-end Choseong (jamo 1) start-end range in hexadecimal.\n\n");
329 printf (" -j2 start-end Jungseong (jamo 2) start-end range in hexadecimal.\n\n");
330 printf (" -j3 start-end Jongseong (jamo 3) start-end range in hexadecimal.\n\n");
331 printf (" -i input_file Unifont hangul-base.hex formatted input file.\n\n");
332 printf (" -o output_file Unifont .hex format output file.\n\n");
333 printf (" Example:\n\n");
334 printf (" unigen-hangul -c 1 -j3 11AB-11AB -i hangul-base.hex -o nieun-only.hex\n\n");
335 printf (" Generates Hangul syllables using all modern choseong and jungseong,\n");
336 printf (" and only the jongseong nieun (Unicode code point U+11AB). The output\n");
337 printf (" Unifont .hex file will contain code points starting at 1. Instead of\n");
338 printf (" specifying \"-j3 11AB-11AB\", simply using \"-j3 11AB\" will also suffice.\n\n");
339
340 exit (EXIT_SUCCESS);
341 }
342
343 arg_count++;
344 }
345
346 return;
347}
348
349
350/**
351 @brief Scan a hexadecimal range from a character string.
352*/
353void
354get_hex_range (char *instring, unsigned *start, unsigned *end) {
355
356 int i; /* String index variable. */
357
358 /* Get first number in range. */
359 sscanf (instring, "%X", start);
360 for (i = 0;
361 instring [i] != '\0' && instring [i] != '-';
362 i++);
363 /* Get last number in range. */
364 if (instring [i] == '-') {
365 i++;
366 sscanf (&instring [i], "%X", end);
367 }
368 else {
369 *end = *start;
370 }
371
372 return;
373}
Define constants and function prototypes for using Hangul glyphs.
void print_glyph_hex(FILE *fp, unsigned codept, unsigned *this_glyph)
Print one glyph in Unifont hexdraw hexadecimal string style.
void combined_jamo(unsigned glyph_table[MAX_GLYPHS][16], unsigned cho, unsigned jung, unsigned jong, unsigned *combined_glyph)
Convert Hangul Jamo choseong, jungseong, and jongseong into a glyph.
#define CHO_UNICODE_START
Modern Hangul choseong start.
Definition: hangul.h:50
#define JONG_UNICODE_END
Modern Hangul jongseong end.
Definition: hangul.h:61
#define JUNG_EXTB_UNICODE_START
Hangul Extended-B jungseong start.
Definition: hangul.h:57
#define JONG_EXTB_UNICODE_END
Hangul Extended-B jongseong end.
Definition: hangul.h:63
#define CHO_EXTA_UNICODE_START
Hangul Extended-A choseong start.
Definition: hangul.h:52
#define JONG_UNICODE_START
Modern Hangul jongseong start.
Definition: hangul.h:60
#define CHO_UNICODE_END
Hangul Jamo choseong end.
Definition: hangul.h:51
#define CHO_EXTA_UNICODE_END
Hangul Extended-A choseong end.
Definition: hangul.h:53
#define JUNG_UNICODE_START
Modern Hangul jungseong start.
Definition: hangul.h:55
#define JONG_EXTB_UNICODE_START
Hangul Extended-B jongseong start.
Definition: hangul.h:62
#define JUNG_UNICODE_END
Modern Hangul jungseong end.
Definition: hangul.h:56
unsigned hangul_read_base16(FILE *infp, unsigned base[][16])
Read hangul-base.hex file into a unsigned array.
#define JUNG_EXTB_UNICODE_END
Hangul Extended-B jungseong end.
Definition: hangul.h:58
#define MAX_GLYPHS
An OpenType font has at most 65536 glyphs.
Definition: hex2otf.c:85
int main(int argc, char *argv[])
Program entry point.
Definition: unigen-hangul.c:69
void get_hex_range(char *instring, unsigned *start, unsigned *end)
Scan a hexadecimal range from a character string.
void parse_args(int argc, char *argv[], struct PARAMS *params)
Parse command line arguments.