From 5c44f8354238d436734e12d3af44cc0c211d2fe2 Mon Sep 17 00:00:00 2001 From: Jaakko Keränen Date: Sat, 25 Sep 2021 14:10:50 +0300 Subject: Canonical URL form decodes colons in paths The handling of colons (a reserved character used in the URL scheme and authority) was left ambiguous in the canonical form. --- src/gmutil.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gmutil.c b/src/gmutil.c index d87de8f6..1d361875 100644 --- a/src/gmutil.c +++ b/src/gmutil.c @@ -620,7 +620,20 @@ const iString *canonicalUrl_String(const iString *d) { - all non-reserved characters decoded (i.e., it's an IRI) - expect for spaces, which are always `%20` This means a canonical URL can be used on a gemtext link line without modifications. */ - iString *canon = maybeUrlDecodeExclude_String(d, "%/?:;#&= "); + iString *canon = NULL; + iUrl parts; + init_Url(&parts, d); + /* Colons are in decoded form in the URL path. */ + if (iStrStrN(parts.path.start, "%3A", size_Range(&parts.path)) || + iStrStrN(parts.path.start, "%3a", size_Range(&parts.path))) { + /* This is done separately to avoid the copy if %3A is not present; it's rare. */ + canon = copy_String(d); + urlDecodePath_String(canon); + urlDecodeExclude_String(d, "%/?:;#&= "); /* decode everything else in all parts */ + } + else { + canon = maybeUrlDecodeExclude_String(d, "%/?:;#&= "); + } /* `canon` may now be NULL if nothing was decoded. */ if (indexOfCStr_String(canon ? canon : d, " ") != iInvalidPos || indexOfCStr_String(canon ? canon : d, "\n") != iInvalidPos) { -- cgit v1.2.3