summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index f563d3738..87fa9e89a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -27,6 +27,7 @@
27# include <langinfo.h> 27# include <langinfo.h>
28#endif 28#endif
29#include <limits.h> 29#include <limits.h>
30#include <locale.h>
30#include <stdarg.h> 31#include <stdarg.h>
31#include <stdio.h> 32#include <stdio.h>
32#include <stdlib.h> 33#include <stdlib.h>
@@ -288,3 +289,44 @@ mprintf(const char *fmt, ...)
288 va_end(ap); 289 va_end(ap);
289 return ret; 290 return ret;
290} 291}
292
293/*
294 * Set up libc for multibyte output in the user's chosen locale.
295 *
296 * XXX: we are known to have problems with Turkish (i/I confusion) so we
297 * deliberately fall back to the C locale for now. Longer term we should
298 * always prefer to select C.[encoding] if possible, but there's no
299 * standardisation in locales between systems, so we'll need to survey
300 * what's out there first.
301 */
302void
303msetlocale(void)
304{
305 const char *vars[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
306 char *cp;
307 int i;
308
309 /*
310 * We can't yet cope with dotless/dotted I in Turkish locales,
311 * so fall back to the C locale for these.
312 */
313 for (i = 0; vars[i] != NULL; i++) {
314 if ((cp = getenv(vars[i])) == NULL)
315 continue;
316 if (strncasecmp(cp, "TR", 2) != 0)
317 break;
318 /*
319 * If we're in a UTF-8 locale then prefer to use
320 * the C.UTF-8 locale (or equivalent) if it exists.
321 */
322 if ((strcasestr(cp, "UTF-8") != NULL ||
323 strcasestr(cp, "UTF8") != NULL) &&
324 (setlocale(LC_CTYPE, "C.UTF-8") != NULL ||
325 setlocale(LC_CTYPE, "POSIX.UTF-8") != NULL))
326 return;
327 setlocale(LC_CTYPE, "C");
328 return;
329 }
330 /* We can handle this locale */
331 setlocale(LC_CTYPE, "");
332}