1 files changed, 42 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index f563d3738..87fa9e89a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -27,6 +27,7 @@
 # include <langinfo.h>
 #endif
 #include <limits.h>
+#include <locale.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -288,3 +289,44 @@ mprintf(const char *fmt, ...)
        va_end(ap);
        return ret;
 }
+/*
+ * Set up libc for multibyte output in the user's chosen locale.
+ *
+ * XXX: we are known to have problems with Turkish (i/I confusion) so we
+ *      deliberately fall back to the C locale for now. Longer term we should
+ *      always prefer to select C.[encoding] if possible, but there's no
+ *      standardisation in locales between systems, so we'll need to survey
+ *      what's out there first.
+ */
+void
+msetlocale(void)
+{
+        const char *vars[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
+        char *cp;
+        int i;
+        /*
+         * We can't yet cope with dotless/dotted I in Turkish locales,
+         * so fall back to the C locale for these.
+         */
+        for (i = 0; vars[i] != NULL; i++) {
+                if ((cp = getenv(vars[i])) == NULL)
+                        continue;
+                if (strncasecmp(cp, "TR", 2) != 0)
+                        break;
+                /*
+                 * If we're in a UTF-8 locale then prefer to use
+                 * the C.UTF-8 locale (or equivalent) if it exists.
+                 */
+                if ((strcasestr(cp, "UTF-8") != NULL ||
+                    strcasestr(cp, "UTF8") != NULL) &&
+                    (setlocale(LC_CTYPE, "C.UTF-8") != NULL ||
+                    setlocale(LC_CTYPE, "POSIX.UTF-8") != NULL))
+                        return;
+                setlocale(LC_CTYPE, "C");
+                return;
+        }
+        /* We can handle this locale */
+        setlocale(LC_CTYPE, "");
+}

diff --git a/utf8.c b/utf8.c index f563d3738..87fa9e89a 100644 --- a/utf8.c +++ b/utf8.c
@@ -27,6 +27,7 @@
27	# include <langinfo.h>	27	# include <langinfo.h>
28	#endif	28	#endif
29	#include <limits.h>	29	#include <limits.h>
		30	#include <locale.h>
30	#include <stdarg.h>	31	#include <stdarg.h>
31	#include <stdio.h>	32	#include <stdio.h>
32	#include <stdlib.h>	33	#include <stdlib.h>
@@ -288,3 +289,44 @@ mprintf(const char *fmt, ...)
288	va_end(ap);	289	va_end(ap);
289	return ret;	290	return ret;
290	}	291	}
		292
		293	/*
		294	* Set up libc for multibyte output in the user's chosen locale.
		295	*
		296	* XXX: we are known to have problems with Turkish (i/I confusion) so we
		297	* deliberately fall back to the C locale for now. Longer term we should
		298	* always prefer to select C.[encoding] if possible, but there's no
		299	* standardisation in locales between systems, so we'll need to survey
		300	* what's out there first.
		301	*/
		302	void
		303	msetlocale(void)
		304	{
		305	const char *vars[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
		306	char *cp;
		307	int i;
		308
		309	/*
		310	* We can't yet cope with dotless/dotted I in Turkish locales,
		311	* so fall back to the C locale for these.
		312	*/
		313	for (i = 0; vars[i] != NULL; i++) {
		314	if ((cp = getenv(vars[i])) == NULL)
		315	continue;
		316	if (strncasecmp(cp, "TR", 2) != 0)
		317	break;
		318	/*
		319	* If we're in a UTF-8 locale then prefer to use
		320	* the C.UTF-8 locale (or equivalent) if it exists.
		321	*/
		322	if ((strcasestr(cp, "UTF-8") != NULL \|\|
		323	strcasestr(cp, "UTF8") != NULL) &&
		324	(setlocale(LC_CTYPE, "C.UTF-8") != NULL \|\|
		325	setlocale(LC_CTYPE, "POSIX.UTF-8") != NULL))
		326	return;
		327	setlocale(LC_CTYPE, "C");
		328	return;
		329	}
		330	/* We can handle this locale */
		331	setlocale(LC_CTYPE, "");
		332	}