summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaakko Keränen <jaakko.keranen@iki.fi>2021-12-27 10:00:17 +0200
committerJaakko Keränen <jaakko.keranen@iki.fi>2021-12-27 10:00:17 +0200
commit5e31f66313e2dc15fb9e75395504feb0992c3feb (patch)
treec43a734bd9ed2d1da671b8c885ef6af028c1b25f
parentcf0b3924268ffe03d8c30dfa553deba79802e41f (diff)
Reserved characters in URLs
Making URL encoding a little less convoluted. Now when sending out a request, the URL is fully encoded except for reserved characters. In the internal representation, non-ASCII characters are in decoded form (i.e., IRI). This means that if the user enters a URL in the input field manually, its non-ASCII characters will be percent encoded as well. However, in this case the user is expected to manually escape all reserved characters because the input field can't tell the difference between what is intended to be a reserved separator and what isn't. For example, a server might expect &-separated fields, and if the user enters such fields manually in the URL field, they shouldn't be converted to %26. When forming a query URL in the input dialog, user-entered text is fully percent-encoded because in that case the input is just a generic text string. IssueID #410
-rw-r--r--src/app.c6
-rw-r--r--src/gmrequest.c6
-rw-r--r--src/gmutil.c30
-rw-r--r--src/gmutil.h2
-rw-r--r--src/ui/inputwidget.c10
5 files changed, 40 insertions, 14 deletions
diff --git a/src/app.c b/src/app.c
index 3b4c24f0..73cc35ee 100644
--- a/src/app.c
+++ b/src/app.c
@@ -2791,12 +2791,6 @@ iBool handleCommand_App(const char *cmd) {
2791 setRedirectCount_DocumentWidget(doc, redirectCount); 2791 setRedirectCount_DocumentWidget(doc, redirectCount);
2792 setOrigin_DocumentWidget(doc, origin); 2792 setOrigin_DocumentWidget(doc, origin);
2793 showCollapsed_Widget(findWidget_App("document.progress"), iFalse); 2793 showCollapsed_Widget(findWidget_App("document.progress"), iFalse);
2794 if (prefs_App()->decodeUserVisibleURLs) {
2795 urlDecodePath_String(url);
2796 }
2797 else {
2798 urlEncodePath_String(url);
2799 }
2800 setUrlFlags_DocumentWidget(doc, url, 2794 setUrlFlags_DocumentWidget(doc, url,
2801 isHistory ? useCachedContentIfAvailable_DocumentWidgetSetUrlFlag : 0); 2795 isHistory ? useCachedContentIfAvailable_DocumentWidgetSetUrlFlag : 0);
2802 /* Optionally, jump to a text in the document. This will only work if the document 2796 /* Optionally, jump to a text in the document. This will only work if the document
diff --git a/src/gmrequest.c b/src/gmrequest.c
index a9c5919d..c23e8499 100644
--- a/src/gmrequest.c
+++ b/src/gmrequest.c
@@ -585,8 +585,10 @@ void setUrl_GmRequest(iGmRequest *d, const iString *url) {
585 /* TODO: Gemini spec allows UTF-8 encoded URLs, but still need to percent-encode non-ASCII 585 /* TODO: Gemini spec allows UTF-8 encoded URLs, but still need to percent-encode non-ASCII
586 characters? Could be a server-side issue, e.g., if they're using a URL parser meant for 586 characters? Could be a server-side issue, e.g., if they're using a URL parser meant for
587 the web. */ 587 the web. */
588 urlEncodePath_String(&d->url); 588 /* Encode everything except already-percent encoded characters. */
589 urlEncodeSpaces_String(&d->url); 589 iString *enc = urlEncodeExclude_String(&d->url, "%" URL_RESERVED_CHARS);
590 set_String(&d->url, enc);
591 delete_String(enc);
590 d->identity = identityForUrl_GmCerts(d->certs, &d->url); 592 d->identity = identityForUrl_GmCerts(d->certs, &d->url);
591} 593}
592 594
diff --git a/src/gmutil.c b/src/gmutil.c
index 79462e41..98e4d4d6 100644
--- a/src/gmutil.c
+++ b/src/gmutil.c
@@ -330,6 +330,28 @@ void urlEncodePath_String(iString *d) {
330 delete_String(encoded); 330 delete_String(encoded);
331} 331}
332 332
333void urlEncodeQuery_String(iString *d) {
334 iUrl url;
335 init_Url(&url, d);
336 if (isEmpty_Range(&url.query)) {
337 return;
338 }
339 iString encoded;
340 init_String(&encoded);
341 appendRange_String(&encoded, (iRangecc){ constBegin_String(d), url.query.start });
342 iString query;
343 url.query.start++; /* omit the question mark */
344 initRange_String(&query, url.query);
345 iString *encQuery = urlEncode_String(&query); /* fully encoded */
346 appendCStr_String(&encoded, "?");
347 append_String(&encoded, encQuery);
348 delete_String(encQuery);
349 deinit_String(&query);
350 appendRange_String(&encoded, (iRangecc){ url.query.end, constEnd_String(d) });
351 set_String(d, &encoded);
352 deinit_String(&encoded);
353}
354
333iBool isKnownScheme_Rangecc(iRangecc scheme) { 355iBool isKnownScheme_Rangecc(iRangecc scheme) {
334 if (isKnownUrlScheme_Rangecc(scheme)) { 356 if (isKnownUrlScheme_Rangecc(scheme)) {
335 return iTrue; 357 return iTrue;
@@ -667,20 +689,20 @@ const iString *canonicalUrl_String(const iString *d) {
667 iString *canon = NULL; 689 iString *canon = NULL;
668 iUrl parts; 690 iUrl parts;
669 init_Url(&parts, d); 691 init_Url(&parts, d);
670 /* Colons are in decoded form in the URL path. */ 692 /* Colons (0x3a) are in decoded form in the URL path. */
671 if (iStrStrN(parts.path.start, "%3A", size_Range(&parts.path)) || 693 if (iStrStrN(parts.path.start, "%3A", size_Range(&parts.path)) ||
672 iStrStrN(parts.path.start, "%3a", size_Range(&parts.path))) { 694 iStrStrN(parts.path.start, "%3a", size_Range(&parts.path))) {
673 /* This is done separately to avoid the copy if %3A is not present; it's rare. */ 695 /* This is done separately to avoid the copy if %3A is not present; it's rare. */
674 canon = copy_String(d); 696 canon = copy_String(d);
675 urlDecodePath_String(canon); 697 urlDecodePath_String(canon);
676 iString *dec = maybeUrlDecodeExclude_String(canon, "%/?:;#&+= "); /* decode everything else in all parts */ 698 iString *dec = maybeUrlDecodeExclude_String(canon, "% " URL_RESERVED_CHARS); /* decode everything else in all parts */
677 if (dec) { 699 if (dec) {
678 set_String(canon, dec); 700 set_String(canon, dec);
679 delete_String(dec); 701 delete_String(dec);
680 } 702 }
681 } 703 }
682 else { 704 else {
683 canon = maybeUrlDecodeExclude_String(d, "%/?:;#&+= "); 705 canon = maybeUrlDecodeExclude_String(d, "% " URL_RESERVED_CHARS);
684 } 706 }
685 /* `canon` may now be NULL if nothing was decoded. */ 707 /* `canon` may now be NULL if nothing was decoded. */
686 if (indexOfCStr_String(canon ? canon : d, " ") != iInvalidPos || 708 if (indexOfCStr_String(canon ? canon : d, " ") != iInvalidPos ||
@@ -689,7 +711,7 @@ const iString *canonicalUrl_String(const iString *d) {
689 canon = copy_String(d); 711 canon = copy_String(d);
690 } 712 }
691 urlEncodeSpaces_String(canon); 713 urlEncodeSpaces_String(canon);
692 } 714 }
693 return canon ? collect_String(canon) : d; 715 return canon ? collect_String(canon) : d;
694} 716}
695 717
diff --git a/src/gmutil.h b/src/gmutil.h
index 6d337eeb..15bb7b2e 100644
--- a/src/gmutil.h
+++ b/src/gmutil.h
@@ -100,6 +100,7 @@ iRegExp * newGemtextLink_RegExp (void);
100 100
101#define GEMINI_DEFAULT_PORT ((uint16_t) 1965) 101#define GEMINI_DEFAULT_PORT ((uint16_t) 1965)
102#define GEMINI_DEFAULT_PORT_CSTR "1965" 102#define GEMINI_DEFAULT_PORT_CSTR "1965"
103#define URL_RESERVED_CHARS ":/?#[]@!$&'()*+,;=" /* RFC 3986 */
103 104
104struct Impl_Url { 105struct Impl_Url {
105 iRangecc scheme; 106 iRangecc scheme;
@@ -131,6 +132,7 @@ const iString * urlFragmentStripped_String(const iString *);
131const iString * urlQueryStripped_String (const iString *); 132const iString * urlQueryStripped_String (const iString *);
132void urlDecodePath_String (iString *); 133void urlDecodePath_String (iString *);
133void urlEncodePath_String (iString *); 134void urlEncodePath_String (iString *);
135void urlEncodeQuery_String (iString *);
134iString * makeFileUrl_String (const iString *localFilePath); 136iString * makeFileUrl_String (const iString *localFilePath);
135const char * makeFileUrl_CStr (const char *localFilePath); 137const char * makeFileUrl_CStr (const char *localFilePath);
136iString * localFilePathFromUrl_String(const iString *); 138iString * localFilePathFromUrl_String(const iString *);
diff --git a/src/ui/inputwidget.c b/src/ui/inputwidget.c
index b94e0c27..24983d69 100644
--- a/src/ui/inputwidget.c
+++ b/src/ui/inputwidget.c
@@ -1100,9 +1100,15 @@ static void updateBuffered_InputWidget_(iInputWidget *d) {
1100void setText_InputWidget(iInputWidget *d, const iString *text) { 1100void setText_InputWidget(iInputWidget *d, const iString *text) {
1101 if (!d) return; 1101 if (!d) return;
1102 if (d->inFlags & isUrl_InputWidgetFlag) { 1102 if (d->inFlags & isUrl_InputWidgetFlag) {
1103 /* If user wants URLs encoded, also Punycode the domain. */ 1103 if (prefs_App()->decodeUserVisibleURLs) {
1104 if (!prefs_App()->decodeUserVisibleURLs) {
1105 iString *enc = collect_String(copy_String(text)); 1104 iString *enc = collect_String(copy_String(text));
1105 urlDecodePath_String(enc);
1106 text = enc;
1107 }
1108 else {
1109 /* The user wants URLs encoded, also Punycode the domain. */
1110 iString *enc = collect_String(copy_String(text));
1111 urlEncodePath_String(enc);
1106 /* Prevent address bar spoofing (mentioned as IDN homograph attack in 1112 /* Prevent address bar spoofing (mentioned as IDN homograph attack in
1107 https://github.com/skyjake/lagrange/issues/73) */ 1113 https://github.com/skyjake/lagrange/issues/73) */
1108 punyEncodeUrlHost_String(enc); 1114 punyEncodeUrlHost_String(enc);