From 7f8f6f3305161706d44884cccd4961ba4f0490bf Mon Sep 17 00:00:00 2001 From: Jaakko Keränen Date: Fri, 11 Dec 2020 20:40:32 +0200 Subject: GmRequest: Punycode for domain names To support Internationalized Domain Names, we need to encode domain names using Punycode. IssueID #73 --- src/gmutil.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'src/gmutil.c') diff --git a/src/gmutil.c b/src/gmutil.c index 477d0f17..67b0d939 100644 --- a/src/gmutil.c +++ b/src/gmutil.c @@ -185,10 +185,51 @@ const iString *absoluteUrl_String(const iString *d, const iString *urlMaybeRelat appendRange_String(absolute, orig.path); } appendRange_String(absolute, rel.query); + normalize_String(absolute); cleanUrlPath_String(absolute); return absolute; } +static iBool equalPuny_(const iString *d, iRangecc orig) { + if (!endsWith_String(d, "-")) { + return iFalse; /* This is a sufficient condition? */ + } + if (size_String(d) != size_Range(&orig) + 1) { + return iFalse; + } + return iCmpStrN(cstr_String(d), orig.start, size_Range(&orig)) == 0; +} + +void punyEncodeUrlHost_String(iString *d) { + /* `d` should be an absolute URL. */ + iUrl url; + init_Url(&url, d); + iString *encoded = new_String(); + setRange_String(encoded, (iRangecc){ url.scheme.start, url.host.start }); + /* The domain name needs to be split into segments. */ { + iRangecc seg = iNullRange; + iBool isFirst = iTrue; + while (nextSplit_Rangecc(url.host, ".", &seg)) { + if (!isFirst) { + appendChar_String(encoded, '.'); + } + isFirst = iFalse; + iString *puny = punyEncode_Rangecc(seg); + if (!isEmpty_String(puny) && !equalPuny_(puny, seg)) { + appendCStr_String(encoded, "xn--"); + append_String(encoded, puny); + } + else { + appendRange_String(encoded, seg); + } + delete_String(puny); + } + } + appendRange_String(encoded, (iRangecc){ url.host.end, constEnd_String(d) }); + set_String(d, encoded); + delete_String(encoded); +} + iString *makeFileUrl_String(const iString *localFilePath) { iString *url = cleaned_Path(localFilePath); replace_Block(&url->chars, '\\', '/'); /* in case it's a Windows path */ -- cgit v1.2.3