summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJaakko Keränen <jaakko.keranen@iki.fi>2020-08-01 13:17:49 +0300
committerJaakko Keränen <jaakko.keranen@iki.fi>2020-08-01 13:17:49 +0300
commit105b1948a8065659e17407189962ca7a4119e733 (patch)
tree3d5a779ad243537fd7d3b05d6c70008c30bb3435 /src
parent3b76ce224e40f670c0988985d9a9405edb7ed24d (diff)
More robust URL processing
Fixed a number of special cases revealed by the client torture test.
Diffstat (limited to 'src')
-rw-r--r--src/gmdocument.c32
-rw-r--r--src/gmdocument.h19
-rw-r--r--src/gmutil.c138
-rw-r--r--src/ui/documentwidget.c19
4 files changed, 137 insertions, 71 deletions
diff --git a/src/gmdocument.c b/src/gmdocument.c
index 0b792d56..116e532a 100644
--- a/src/gmdocument.c
+++ b/src/gmdocument.c
@@ -171,18 +171,17 @@ static iRangecc addLink_GmDocument_(iGmDocument *d, iRangecc line, iGmLinkId *li
171 if (matchRange_RegExp(pattern, line, &m)) { 171 if (matchRange_RegExp(pattern, line, &m)) {
172 iGmLink *link = new_GmLink(); 172 iGmLink *link = new_GmLink();
173 setRange_String(&link->url, capturedRange_RegExpMatch(&m, 1)); 173 setRange_String(&link->url, capturedRange_RegExpMatch(&m, 1));
174 set_String(&link->url, absoluteUrl_String(&d->url, &link->url));
174 /* Check the URL. */ { 175 /* Check the URL. */ {
175 iUrl parts; 176 iUrl parts;
176 init_Url(&parts, &link->url); 177 init_Url(&parts, &link->url);
177 /* Host name. */ 178 if (!equalCase_Rangecc(&parts.host, cstr_String(&d->localHost))) {
178 if (!isEmpty_Range(&parts.host) &&
179 !equalCase_Rangecc(&parts.host, cstr_String(&d->localHost))) {
180 link->flags |= remote_GmLinkFlag; 179 link->flags |= remote_GmLinkFlag;
181 } 180 }
182 if (!isEmpty_Range(&parts.protocol) && !equalCase_Rangecc(&parts.protocol, "gemini")) { 181 if (startsWithCase_Rangecc(&parts.protocol, "gemini")) {
183 link->flags |= remote_GmLinkFlag; 182 link->flags |= gemini_GmLinkFlag;
184 } 183 }
185 if (startsWithCase_Rangecc(&parts.protocol, "http")) { 184 else if (startsWithCase_Rangecc(&parts.protocol, "http")) {
186 link->flags |= http_GmLinkFlag; 185 link->flags |= http_GmLinkFlag;
187 } 186 }
188 else if (equalCase_Rangecc(&parts.protocol, "gopher")) { 187 else if (equalCase_Rangecc(&parts.protocol, "gopher")) {
@@ -191,6 +190,9 @@ static iRangecc addLink_GmDocument_(iGmDocument *d, iRangecc line, iGmLinkId *li
191 else if (equalCase_Rangecc(&parts.protocol, "file")) { 190 else if (equalCase_Rangecc(&parts.protocol, "file")) {
192 link->flags |= file_GmLinkFlag; 191 link->flags |= file_GmLinkFlag;
193 } 192 }
193 else if (equalCase_Rangecc(&parts.protocol, "data")) {
194 link->flags |= data_GmLinkFlag;
195 }
194 /* Check the file name extension, if present. */ 196 /* Check the file name extension, if present. */
195 if (!isEmpty_Range(&parts.path)) { 197 if (!isEmpty_Range(&parts.path)) {
196 iString *path = newRange_String(parts.path); 198 iString *path = newRange_String(parts.path);
@@ -206,11 +208,9 @@ static iRangecc addLink_GmDocument_(iGmDocument *d, iRangecc line, iGmLinkId *li
206 } 208 }
207 delete_String(path); 209 delete_String(path);
208 } 210 }
209 } 211 /* Check if visited. */
210 /* Check if visited. */ { 212 if (cmpString_String(&link->url, &d->url)) {
211 const iString *absUrl = absoluteUrl_String(&d->url, &link->url); 213 link->when = urlVisitTime_History(history_App(), &link->url);
212 if (cmpString_String(absUrl, &d->url)) {
213 link->when = urlVisitTime_History(history_App(), absUrl);
214 if (isValid_Time(&link->when)) { 214 if (isValid_Time(&link->when)) {
215 link->flags |= visited_GmLinkFlag; 215 link->flags |= visited_GmLinkFlag;
216 } 216 }
@@ -401,8 +401,10 @@ static void doLayout_GmDocument_(iGmDocument *d) {
401 if (link->flags & remote_GmLinkFlag) { 401 if (link->flags & remote_GmLinkFlag) {
402 run.visBounds.pos.x -= gap_UI / 2; 402 run.visBounds.pos.x -= gap_UI / 2;
403 } 403 }
404
405 run.color = linkColor_GmDocument(d, run.linkId); 404 run.color = linkColor_GmDocument(d, run.linkId);
405 if (link->flags & visited_GmLinkFlag) {
406 run.color--; /* darker */
407 }
406 pushBack_Array(&d->layout, &run); 408 pushBack_Array(&d->layout, &run);
407 } 409 }
408 run.color = colors[type]; 410 run.color = colors[type];
@@ -554,7 +556,7 @@ static void normalize_GmDocument(iGmDocument *d) {
554 appendCStrN_String(normalized, " ", 1); 556 appendCStrN_String(normalized, " ", 1);
555 } 557 }
556 } 558 }
557 else { 559 else if (*ch != '\r') {
558 appendCStrN_String(normalized, ch, 1); 560 appendCStrN_String(normalized, ch, 1);
559 } 561 }
560 } 562 }
@@ -573,6 +575,7 @@ static void normalize_GmDocument(iGmDocument *d) {
573 iBool isPrevSpace = iFalse; 575 iBool isPrevSpace = iFalse;
574 for (const char *ch = line.start; ch != line.end; ch++) { 576 for (const char *ch = line.start; ch != line.end; ch++) {
575 char c = *ch; 577 char c = *ch;
578 if (c == '\r') continue;
576 if (isNormalizableSpace_(c)) { 579 if (isNormalizableSpace_(c)) {
577 if (isPrevSpace) { 580 if (isPrevSpace) {
578 continue; /* skip repeated spaces */ 581 continue; /* skip repeated spaces */
@@ -738,6 +741,9 @@ uint16_t linkImage_GmDocument(const iGmDocument *d, iGmLinkId linkId) {
738enum iColorId linkColor_GmDocument(const iGmDocument *d, iGmLinkId linkId) { 741enum iColorId linkColor_GmDocument(const iGmDocument *d, iGmLinkId linkId) {
739 const iGmLink *link = link_GmDocument_(d, linkId); 742 const iGmLink *link = link_GmDocument_(d, linkId);
740 if (link) { 743 if (link) {
744 if ((link->flags & supportedProtocol_GmLinkFlag) == 0) {
745 return red_ColorId;
746 }
741 return link->flags & http_GmLinkFlag 747 return link->flags & http_GmLinkFlag
742 ? orange_ColorId 748 ? orange_ColorId
743 : link->flags & gopher_GmLinkFlag ? blue_ColorId : cyan_ColorId; 749 : link->flags & gopher_GmLinkFlag ? blue_ColorId : cyan_ColorId;
diff --git a/src/gmdocument.h b/src/gmdocument.h
index 92b9e1cb..db5a5451 100644
--- a/src/gmdocument.h
+++ b/src/gmdocument.h
@@ -13,15 +13,18 @@ iDeclareType(GmRun)
13typedef uint16_t iGmLinkId; 13typedef uint16_t iGmLinkId;
14 14
15enum iGmLinkFlags { 15enum iGmLinkFlags {
16 userFriendly_GmLinkFlag = iBit(1), 16 gemini_GmLinkFlag = iBit(1),
17 remote_GmLinkFlag = iBit(2), 17 gopher_GmLinkFlag = iBit(2),
18 http_GmLinkFlag = iBit(3), 18 http_GmLinkFlag = iBit(3),
19 gopher_GmLinkFlag = iBit(4), 19 file_GmLinkFlag = iBit(4),
20 file_GmLinkFlag = iBit(5), 20 data_GmLinkFlag = iBit(5),
21 imageFileExtension_GmLinkFlag = iBit(6), 21 supportedProtocol_GmLinkFlag = 0x1f,
22 audioFileExtension_GmLinkFlag = iBit(7), 22 remote_GmLinkFlag = iBit(9),
23 content_GmLinkFlag = iBit(8), /* content visible below */ 23 userFriendly_GmLinkFlag = iBit(10),
24 visited_GmLinkFlag = iBit(9), /* in the history */ 24 imageFileExtension_GmLinkFlag = iBit(11),
25 audioFileExtension_GmLinkFlag = iBit(12),
26 content_GmLinkFlag = iBit(13), /* content visible below */
27 visited_GmLinkFlag = iBit(14), /* in the history */
25}; 28};
26 29
27iDeclareType(GmImageInfo) 30iDeclareType(GmImageInfo)
diff --git a/src/gmutil.c b/src/gmutil.c
index af090574..2008ea36 100644
--- a/src/gmutil.c
+++ b/src/gmutil.c
@@ -6,30 +6,36 @@
6 6
7void init_Url(iUrl *d, const iString *text) { 7void init_Url(iUrl *d, const iString *text) {
8 iRegExp *absPat = 8 iRegExp *absPat =
9 new_RegExp("(.+)://([^/:?]*)(:[0-9]+)?([^?]*)(\\?.*)?", caseInsensitive_RegExpOption); 9 new_RegExp("([a-z]+:)?(//[^/:?]*)(:[0-9]+)?([^?]*)(\\?.*)?", caseInsensitive_RegExpOption);
10 iRegExpMatch m; 10 iRegExpMatch m;
11 if (matchString_RegExp(absPat, text, &m)) { 11 if (matchString_RegExp(absPat, text, &m)) {
12 d->protocol = capturedRange_RegExpMatch(&m, 1); 12 d->protocol = capturedRange_RegExpMatch(&m, 1);
13 d->host = capturedRange_RegExpMatch(&m, 2); 13 d->host = capturedRange_RegExpMatch(&m, 2);
14 d->port = capturedRange_RegExpMatch(&m, 3); 14 if (!isEmpty_Range(&d->host)) {
15 d->host.start += 2; /* skip the double slash */
16 }
17 d->port = capturedRange_RegExpMatch(&m, 3);
15 if (!isEmpty_Range(&d->port)) { 18 if (!isEmpty_Range(&d->port)) {
16 /* Don't include the colon. */ 19 d->port.start++; /* omit the colon */
17 d->port.start++;
18 } 20 }
19 d->path = capturedRange_RegExpMatch(&m, 4); 21 d->path = capturedRange_RegExpMatch(&m, 4);
20 d->query = capturedRange_RegExpMatch(&m, 5); 22 d->query = capturedRange_RegExpMatch(&m, 5);
21 } 23 }
22 else { 24 else {
23 /* Must be a relative path. */ 25 /* Must be a relative path. */
24 iZap(*d); 26 iZap(*d);
25 iRegExp *relPat = new_RegExp("([^?]*)(\\?.*)?", 0); 27 iRegExp *relPat = new_RegExp("([a-z]+:)?([^?]*)(\\?.*)?", 0);
26 if (matchString_RegExp(relPat, text, &m)) { 28 if (matchString_RegExp(relPat, text, &m)) {
27 d->path = capturedRange_RegExpMatch(&m, 1); 29 d->protocol = capturedRange_RegExpMatch(&m, 1);
28 d->query = capturedRange_RegExpMatch(&m, 2); 30 d->path = capturedRange_RegExpMatch(&m, 2);
31 d->query = capturedRange_RegExpMatch(&m, 3);
29 } 32 }
30 iRelease(relPat); 33 iRelease(relPat);
31 } 34 }
32 iRelease(absPat); 35 iRelease(absPat);
36 if (!isEmpty_Range(&d->protocol)) {
37 d->protocol.end--; /* omit the colon */
38 }
33} 39}
34 40
35static iRangecc dirPath_(iRangecc path) { 41static iRangecc dirPath_(iRangecc path) {
@@ -38,51 +44,97 @@ static iRangecc dirPath_(iRangecc path) {
38 return (iRangecc){ path.start, path.start + pos }; 44 return (iRangecc){ path.start, path.start + pos };
39} 45}
40 46
47iLocalDef iBool isDef_(iRangecc cc) {
48 return !isEmpty_Range(&cc);
49}
50
51static iRangecc prevPathSeg_(const char *end, const char *start) {
52 iRangecc seg = { end, end };
53 do {
54 seg.start--;
55 } while (*seg.start != '/' && seg.start != start);
56 return seg;
57}
58
59void cleanUrlPath_String(iString *d) {
60 iString clean;
61 init_String(&clean);
62 iUrl parts;
63 init_Url(&parts, d);
64 iRangecc seg = iNullRange;
65 while (nextSplit_Rangecc(&parts.path, "/", &seg)) {
66 if (equal_Rangecc(&seg, "..")) {
67 /* Back up one segment. */
68 iRangecc last = prevPathSeg_(constEnd_String(&clean), constBegin_String(&clean));
69 truncate_Block(&clean.chars, last.start - constBegin_String(&clean));
70 }
71 else if (equal_Rangecc(&seg, ".")) {
72 /* Skip it. */
73 }
74 else {
75 appendCStr_String(&clean, "/");
76 appendRange_String(&clean, seg);
77 }
78 }
79 if (endsWith_Rangecc(&parts.path, "/")) {
80 appendCStr_String(&clean, "/");
81 }
82 /* Replace with the new path. */
83 if (cmpCStrNSc_Rangecc(&parts.path, cstr_String(&clean), size_String(&clean), &iCaseSensitive)) {
84 const size_t pos = parts.path.start - constBegin_String(d);
85 remove_Block(&d->chars, pos, size_Range(&parts.path));
86 insertData_Block(&d->chars, pos, cstr_String(&clean), size_String(&clean));
87 }
88 deinit_String(&clean);
89}
90
41const iString *absoluteUrl_String(const iString *d, const iString *urlMaybeRelative) { 91const iString *absoluteUrl_String(const iString *d, const iString *urlMaybeRelative) {
42 if (indexOfCStr_String(urlMaybeRelative, "://") != iInvalidPos) { 92 iUrl orig;
43 /* Already absolute. */ 93 iUrl rel;
94 init_Url(&orig, d);
95 init_Url(&rel, urlMaybeRelative);
96 if (equalCase_Rangecc(&rel.protocol, "data")) {
97 /* Special case, the contents should be left unparsed. */
44 return urlMaybeRelative; 98 return urlMaybeRelative;
45 } 99 }
46 iUrl parts; 100 const iBool isRelative = !isDef_(rel.host);
47 init_Url(&parts, d); 101 iRangecc protocol = range_CStr("gemini");
48 iString *absolute = new_String(); 102 if (isDef_(rel.protocol)) {
49 appendRange_String(absolute, parts.protocol); 103 protocol = rel.protocol;
50 appendCStr_String(absolute, "://"); 104 }
51 appendRange_String(absolute, parts.host); 105 else if (isRelative && isDef_(orig.protocol)) {
52 if (!isEmpty_Range(&parts.port)) { 106 protocol = orig.protocol;
53 appendCStr_String(absolute, ":");
54 appendRange_String(absolute, parts.port);
55 } 107 }
56 if (startsWith_String(urlMaybeRelative, "/")) { 108 iString *absolute = collectNew_String();
57 append_String(absolute, urlMaybeRelative); 109 appendRange_String(absolute, protocol);
110 appendCStr_String(absolute, "://"); {
111 const iUrl *selHost = isDef_(rel.host) ? &rel : &orig;
112 appendRange_String(absolute, selHost->host);
113 if (!isEmpty_Range(&selHost->port)) {
114 appendCStr_String(absolute, ":");
115 appendRange_String(absolute, selHost->port);
116 }
117 }
118 if (isDef_(rel.protocol) || isDef_(rel.host) || startsWith_Rangecc(&rel.path, "/")) {
119 appendRange_String(absolute, rel.path); /* absolute path */
58 } 120 }
59 else { 121 else {
60 iRangecc relPath = range_String(urlMaybeRelative); 122 if (!endsWith_Rangecc(&orig.path, "/")) {
61 iRangecc dir = dirPath_(parts.path); 123 /* Referencing a file. */
62 for (;;) { 124 appendRange_String(absolute, dirPath_(orig.path));
63 if (equal_Rangecc(&relPath, ".")) { 125 }
64 relPath.start++; 126 else {
65 } 127 /* Referencing a directory. */
66 else if (startsWith_Rangecc(&relPath, "./")) { 128 appendRange_String(absolute, orig.path);
67 relPath.start += 2;
68 }
69 else if (equal_Rangecc(&relPath, "..")) {
70 relPath.start += 2;
71 dir = dirPath_(dir);
72 }
73 else if (startsWith_Rangecc(&relPath, "../")) {
74 relPath.start += 3;
75 dir = dirPath_(dir);
76 }
77 else break;
78 } 129 }
79 appendRange_String(absolute, dir);
80 if (!endsWith_String(absolute, "/")) { 130 if (!endsWith_String(absolute, "/")) {
81 appendCStr_String(absolute, "/"); 131 appendCStr_String(absolute, "/");
82 } 132 }
83 appendRange_String(absolute, relPath); 133 appendRange_String(absolute, rel.path);
84 } 134 }
85 return collect_String(absolute); 135 appendRange_String(absolute, rel.query);
136 cleanUrlPath_String(absolute);
137 return absolute;
86} 138}
87 139
88void urlEncodeSpaces_String(iString *d) { 140void urlEncodeSpaces_String(iString *d) {
diff --git a/src/ui/documentwidget.c b/src/ui/documentwidget.c
index 200bda95..3b8c468b 100644
--- a/src/ui/documentwidget.c
+++ b/src/ui/documentwidget.c
@@ -144,14 +144,20 @@ void deinit_DocumentWidget(iDocumentWidget *d) {
144 SDL_FreeCursor(d->handCursor); 144 SDL_FreeCursor(d->handCursor);
145} 145}
146 146
147#if 0
147static iString *cleanUrl_(const iString *url) { 148static iString *cleanUrl_(const iString *url) {
148 iString *clean = copy_String(url); 149 iString *clean = copy_String(url);
149 if (indexOfCStr_String(url, "://") == iInvalidPos && !startsWithCase_String(url, "gemini:")) { 150 if (startsWith_String(url, "//")) {
151 prependCStr_String(clean, "gemini:");
152 }
153 else if (indexOfCStr_String(url, "://") == iInvalidPos && !startsWithCase_String(url, "gemini:")
154 && !startsWithCase_String(url, "data:")) {
150 /* Prepend default protocol. */ 155 /* Prepend default protocol. */
151 prependCStr_String(clean, "gemini://"); 156 prependCStr_String(clean, "gemini://");
152 } 157 }
153 return clean; 158 return clean;
154} 159}
160#endif
155 161
156static int documentWidth_DocumentWidget_(const iDocumentWidget *d) { 162static int documentWidth_DocumentWidget_(const iDocumentWidget *d) {
157 const iWidget *w = constAs_Widget(d); 163 const iWidget *w = constAs_Widget(d);
@@ -201,7 +207,7 @@ static iRangei visibleRange_DocumentWidget_(const iDocumentWidget *d) {
201 207
202static void addVisibleLink_DocumentWidget_(void *context, const iGmRun *run) { 208static void addVisibleLink_DocumentWidget_(void *context, const iGmRun *run) {
203 iDocumentWidget *d = context; 209 iDocumentWidget *d = context;
204 if (run->linkId) { 210 if (run->linkId && linkFlags_GmDocument(d->doc, run->linkId) & supportedProtocol_GmLinkFlag) {
205 pushBack_PtrArray(&d->visibleLinks, run); 211 pushBack_PtrArray(&d->visibleLinks, run);
206 } 212 }
207} 213}
@@ -363,9 +369,8 @@ static void fetch_DocumentWidget_(iDocumentWidget *d) {
363} 369}
364 370
365void setUrl_DocumentWidget(iDocumentWidget *d, const iString *url) { 371void setUrl_DocumentWidget(iDocumentWidget *d, const iString *url) {
366 iString *newUrl = collect_String(cleanUrl_(url)); 372 if (cmpStringSc_String(d->url, url, &iCaseInsensitive)) {
367 if (cmpStringSc_String(d->url, newUrl, &iCaseInsensitive)) { 373 set_String(d->url, url);
368 set_String(d->url, newUrl);
369 fetch_DocumentWidget_(d); 374 fetch_DocumentWidget_(d);
370 } 375 }
371 /* See if there a username in the URL. */ { 376 /* See if there a username in the URL. */ {
@@ -861,7 +866,7 @@ static void drawRun_DrawContext_(void *context, const iGmRun *run) {
861 fillRange_DrawContext_(d, run, teal_ColorId, d->widget->foundMark, &d->inFoundMark); 866 fillRange_DrawContext_(d, run, teal_ColorId, d->widget->foundMark, &d->inFoundMark);
862 fillRange_DrawContext_(d, run, brown_ColorId, d->widget->selectMark, &d->inSelectMark); 867 fillRange_DrawContext_(d, run, brown_ColorId, d->widget->selectMark, &d->inSelectMark);
863 if (run->linkId && !isEmpty_Rect(run->bounds)) { 868 if (run->linkId && !isEmpty_Rect(run->bounds)) {
864 const int flags = linkFlags_GmDocument(doc, run->linkId); 869// const int flags = linkFlags_GmDocument(doc, run->linkId);
865 fg = /*flags & visited_GmLinkFlag ? gray88_ColorId :*/ white_ColorId; 870 fg = /*flags & visited_GmLinkFlag ? gray88_ColorId :*/ white_ColorId;
866 if (isHover || linkFlags_GmDocument(doc, run->linkId) & content_GmLinkFlag) { 871 if (isHover || linkFlags_GmDocument(doc, run->linkId) & content_GmLinkFlag) {
867 fg = linkColor_GmDocument(doc, run->linkId); 872 fg = linkColor_GmDocument(doc, run->linkId);
@@ -913,7 +918,7 @@ static void drawRun_DrawContext_(void *context, const iGmRun *run) {
913 init_Url(&parts, url); 918 init_Url(&parts, url);
914 const iString *host = collect_String(newRange_String(parts.host)); 919 const iString *host = collect_String(newRange_String(parts.host));
915 fg = linkColor_GmDocument(doc, linkId); 920 fg = linkColor_GmDocument(doc, linkId);
916 const iBool showHost = (!isEmpty_String(host) && flags & userFriendly_GmLinkFlag); 921 const iBool showHost = (!isEmpty_String(host) && flags & userFriendly_GmLinkFlag);
917 const iBool showImage = (flags & imageFileExtension_GmLinkFlag) != 0; 922 const iBool showImage = (flags & imageFileExtension_GmLinkFlag) != 0;
918 const iBool showAudio = (flags & audioFileExtension_GmLinkFlag) != 0; 923 const iBool showAudio = (flags & audioFileExtension_GmLinkFlag) != 0;
919 iString str; 924 iString str;