diff options
author | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-12-27 10:00:17 +0200 |
---|---|---|
committer | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-12-27 10:00:17 +0200 |
commit | 5e31f66313e2dc15fb9e75395504feb0992c3feb (patch) | |
tree | c43a734bd9ed2d1da671b8c885ef6af028c1b25f | |
parent | cf0b3924268ffe03d8c30dfa553deba79802e41f (diff) |
Reserved characters in URLs
Making URL encoding a little less convoluted. Now when sending out a request, the URL is fully encoded except for reserved characters. In the internal representation, non-ASCII characters are in decoded form (i.e., IRI).
This means that if the user enters a URL in the input field manually, its non-ASCII characters will be percent encoded as well. However, in this case the user is expected to manually escape all reserved characters because the input field can't tell the difference between what is intended to be a reserved separator and what isn't. For example, a server might expect &-separated fields, and if the user enters such fields manually in the URL field, they shouldn't be converted to %26.
When forming a query URL in the input dialog, user-entered text is fully percent-encoded because in that case the input is just a generic text string.
IssueID #410
-rw-r--r-- | src/app.c | 6 | ||||
-rw-r--r-- | src/gmrequest.c | 6 | ||||
-rw-r--r-- | src/gmutil.c | 30 | ||||
-rw-r--r-- | src/gmutil.h | 2 | ||||
-rw-r--r-- | src/ui/inputwidget.c | 10 |
5 files changed, 40 insertions, 14 deletions
@@ -2791,12 +2791,6 @@ iBool handleCommand_App(const char *cmd) { | |||
2791 | setRedirectCount_DocumentWidget(doc, redirectCount); | 2791 | setRedirectCount_DocumentWidget(doc, redirectCount); |
2792 | setOrigin_DocumentWidget(doc, origin); | 2792 | setOrigin_DocumentWidget(doc, origin); |
2793 | showCollapsed_Widget(findWidget_App("document.progress"), iFalse); | 2793 | showCollapsed_Widget(findWidget_App("document.progress"), iFalse); |
2794 | if (prefs_App()->decodeUserVisibleURLs) { | ||
2795 | urlDecodePath_String(url); | ||
2796 | } | ||
2797 | else { | ||
2798 | urlEncodePath_String(url); | ||
2799 | } | ||
2800 | setUrlFlags_DocumentWidget(doc, url, | 2794 | setUrlFlags_DocumentWidget(doc, url, |
2801 | isHistory ? useCachedContentIfAvailable_DocumentWidgetSetUrlFlag : 0); | 2795 | isHistory ? useCachedContentIfAvailable_DocumentWidgetSetUrlFlag : 0); |
2802 | /* Optionally, jump to a text in the document. This will only work if the document | 2796 | /* Optionally, jump to a text in the document. This will only work if the document |
diff --git a/src/gmrequest.c b/src/gmrequest.c index a9c5919d..c23e8499 100644 --- a/src/gmrequest.c +++ b/src/gmrequest.c | |||
@@ -585,8 +585,10 @@ void setUrl_GmRequest(iGmRequest *d, const iString *url) { | |||
585 | /* TODO: Gemini spec allows UTF-8 encoded URLs, but still need to percent-encode non-ASCII | 585 | /* TODO: Gemini spec allows UTF-8 encoded URLs, but still need to percent-encode non-ASCII |
586 | characters? Could be a server-side issue, e.g., if they're using a URL parser meant for | 586 | characters? Could be a server-side issue, e.g., if they're using a URL parser meant for |
587 | the web. */ | 587 | the web. */ |
588 | urlEncodePath_String(&d->url); | 588 | /* Encode everything except already-percent encoded characters. */ |
589 | urlEncodeSpaces_String(&d->url); | 589 | iString *enc = urlEncodeExclude_String(&d->url, "%" URL_RESERVED_CHARS); |
590 | set_String(&d->url, enc); | ||
591 | delete_String(enc); | ||
590 | d->identity = identityForUrl_GmCerts(d->certs, &d->url); | 592 | d->identity = identityForUrl_GmCerts(d->certs, &d->url); |
591 | } | 593 | } |
592 | 594 | ||
diff --git a/src/gmutil.c b/src/gmutil.c index 79462e41..98e4d4d6 100644 --- a/src/gmutil.c +++ b/src/gmutil.c | |||
@@ -330,6 +330,28 @@ void urlEncodePath_String(iString *d) { | |||
330 | delete_String(encoded); | 330 | delete_String(encoded); |
331 | } | 331 | } |
332 | 332 | ||
333 | void urlEncodeQuery_String(iString *d) { | ||
334 | iUrl url; | ||
335 | init_Url(&url, d); | ||
336 | if (isEmpty_Range(&url.query)) { | ||
337 | return; | ||
338 | } | ||
339 | iString encoded; | ||
340 | init_String(&encoded); | ||
341 | appendRange_String(&encoded, (iRangecc){ constBegin_String(d), url.query.start }); | ||
342 | iString query; | ||
343 | url.query.start++; /* omit the question mark */ | ||
344 | initRange_String(&query, url.query); | ||
345 | iString *encQuery = urlEncode_String(&query); /* fully encoded */ | ||
346 | appendCStr_String(&encoded, "?"); | ||
347 | append_String(&encoded, encQuery); | ||
348 | delete_String(encQuery); | ||
349 | deinit_String(&query); | ||
350 | appendRange_String(&encoded, (iRangecc){ url.query.end, constEnd_String(d) }); | ||
351 | set_String(d, &encoded); | ||
352 | deinit_String(&encoded); | ||
353 | } | ||
354 | |||
333 | iBool isKnownScheme_Rangecc(iRangecc scheme) { | 355 | iBool isKnownScheme_Rangecc(iRangecc scheme) { |
334 | if (isKnownUrlScheme_Rangecc(scheme)) { | 356 | if (isKnownUrlScheme_Rangecc(scheme)) { |
335 | return iTrue; | 357 | return iTrue; |
@@ -667,20 +689,20 @@ const iString *canonicalUrl_String(const iString *d) { | |||
667 | iString *canon = NULL; | 689 | iString *canon = NULL; |
668 | iUrl parts; | 690 | iUrl parts; |
669 | init_Url(&parts, d); | 691 | init_Url(&parts, d); |
670 | /* Colons are in decoded form in the URL path. */ | 692 | /* Colons (0x3a) are in decoded form in the URL path. */ |
671 | if (iStrStrN(parts.path.start, "%3A", size_Range(&parts.path)) || | 693 | if (iStrStrN(parts.path.start, "%3A", size_Range(&parts.path)) || |
672 | iStrStrN(parts.path.start, "%3a", size_Range(&parts.path))) { | 694 | iStrStrN(parts.path.start, "%3a", size_Range(&parts.path))) { |
673 | /* This is done separately to avoid the copy if %3A is not present; it's rare. */ | 695 | /* This is done separately to avoid the copy if %3A is not present; it's rare. */ |
674 | canon = copy_String(d); | 696 | canon = copy_String(d); |
675 | urlDecodePath_String(canon); | 697 | urlDecodePath_String(canon); |
676 | iString *dec = maybeUrlDecodeExclude_String(canon, "%/?:;#&+= "); /* decode everything else in all parts */ | 698 | iString *dec = maybeUrlDecodeExclude_String(canon, "% " URL_RESERVED_CHARS); /* decode everything else in all parts */ |
677 | if (dec) { | 699 | if (dec) { |
678 | set_String(canon, dec); | 700 | set_String(canon, dec); |
679 | delete_String(dec); | 701 | delete_String(dec); |
680 | } | 702 | } |
681 | } | 703 | } |
682 | else { | 704 | else { |
683 | canon = maybeUrlDecodeExclude_String(d, "%/?:;#&+= "); | 705 | canon = maybeUrlDecodeExclude_String(d, "% " URL_RESERVED_CHARS); |
684 | } | 706 | } |
685 | /* `canon` may now be NULL if nothing was decoded. */ | 707 | /* `canon` may now be NULL if nothing was decoded. */ |
686 | if (indexOfCStr_String(canon ? canon : d, " ") != iInvalidPos || | 708 | if (indexOfCStr_String(canon ? canon : d, " ") != iInvalidPos || |
@@ -689,7 +711,7 @@ const iString *canonicalUrl_String(const iString *d) { | |||
689 | canon = copy_String(d); | 711 | canon = copy_String(d); |
690 | } | 712 | } |
691 | urlEncodeSpaces_String(canon); | 713 | urlEncodeSpaces_String(canon); |
692 | } | 714 | } |
693 | return canon ? collect_String(canon) : d; | 715 | return canon ? collect_String(canon) : d; |
694 | } | 716 | } |
695 | 717 | ||
diff --git a/src/gmutil.h b/src/gmutil.h index 6d337eeb..15bb7b2e 100644 --- a/src/gmutil.h +++ b/src/gmutil.h | |||
@@ -100,6 +100,7 @@ iRegExp * newGemtextLink_RegExp (void); | |||
100 | 100 | ||
101 | #define GEMINI_DEFAULT_PORT ((uint16_t) 1965) | 101 | #define GEMINI_DEFAULT_PORT ((uint16_t) 1965) |
102 | #define GEMINI_DEFAULT_PORT_CSTR "1965" | 102 | #define GEMINI_DEFAULT_PORT_CSTR "1965" |
103 | #define URL_RESERVED_CHARS ":/?#[]@!$&'()*+,;=" /* RFC 3986 */ | ||
103 | 104 | ||
104 | struct Impl_Url { | 105 | struct Impl_Url { |
105 | iRangecc scheme; | 106 | iRangecc scheme; |
@@ -131,6 +132,7 @@ const iString * urlFragmentStripped_String(const iString *); | |||
131 | const iString * urlQueryStripped_String (const iString *); | 132 | const iString * urlQueryStripped_String (const iString *); |
132 | void urlDecodePath_String (iString *); | 133 | void urlDecodePath_String (iString *); |
133 | void urlEncodePath_String (iString *); | 134 | void urlEncodePath_String (iString *); |
135 | void urlEncodeQuery_String (iString *); | ||
134 | iString * makeFileUrl_String (const iString *localFilePath); | 136 | iString * makeFileUrl_String (const iString *localFilePath); |
135 | const char * makeFileUrl_CStr (const char *localFilePath); | 137 | const char * makeFileUrl_CStr (const char *localFilePath); |
136 | iString * localFilePathFromUrl_String(const iString *); | 138 | iString * localFilePathFromUrl_String(const iString *); |
diff --git a/src/ui/inputwidget.c b/src/ui/inputwidget.c index b94e0c27..24983d69 100644 --- a/src/ui/inputwidget.c +++ b/src/ui/inputwidget.c | |||
@@ -1100,9 +1100,15 @@ static void updateBuffered_InputWidget_(iInputWidget *d) { | |||
1100 | void setText_InputWidget(iInputWidget *d, const iString *text) { | 1100 | void setText_InputWidget(iInputWidget *d, const iString *text) { |
1101 | if (!d) return; | 1101 | if (!d) return; |
1102 | if (d->inFlags & isUrl_InputWidgetFlag) { | 1102 | if (d->inFlags & isUrl_InputWidgetFlag) { |
1103 | /* If user wants URLs encoded, also Punycode the domain. */ | 1103 | if (prefs_App()->decodeUserVisibleURLs) { |
1104 | if (!prefs_App()->decodeUserVisibleURLs) { | ||
1105 | iString *enc = collect_String(copy_String(text)); | 1104 | iString *enc = collect_String(copy_String(text)); |
1105 | urlDecodePath_String(enc); | ||
1106 | text = enc; | ||
1107 | } | ||
1108 | else { | ||
1109 | /* The user wants URLs encoded, also Punycode the domain. */ | ||
1110 | iString *enc = collect_String(copy_String(text)); | ||
1111 | urlEncodePath_String(enc); | ||
1106 | /* Prevent address bar spoofing (mentioned as IDN homograph attack in | 1112 | /* Prevent address bar spoofing (mentioned as IDN homograph attack in |
1107 | https://github.com/skyjake/lagrange/issues/73) */ | 1113 | https://github.com/skyjake/lagrange/issues/73) */ |
1108 | punyEncodeUrlHost_String(enc); | 1114 | punyEncodeUrlHost_String(enc); |