diff options
Diffstat (limited to 'src/gmutil.c')
-rw-r--r-- | src/gmutil.c | 103 |
1 files changed, 70 insertions, 33 deletions
diff --git a/src/gmutil.c b/src/gmutil.c index 2138caa3..131734b2 100644 --- a/src/gmutil.c +++ b/src/gmutil.c | |||
@@ -1,3 +1,25 @@ | |||
1 | /* Copyright 2020 Jaakko Keränen <jaakko.keranen@iki.fi> | ||
2 | |||
3 | Redistribution and use in source and binary forms, with or without | ||
4 | modification, are permitted provided that the following conditions are met: | ||
5 | |||
6 | 1. Redistributions of source code must retain the above copyright notice, this | ||
7 | list of conditions and the following disclaimer. | ||
8 | 2. Redistributions in binary form must reproduce the above copyright notice, | ||
9 | this list of conditions and the following disclaimer in the documentation | ||
10 | and/or other materials provided with the distribution. | ||
11 | |||
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
14 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
15 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||
16 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
18 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||
19 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
20 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ | ||
22 | |||
1 | #include "gmutil.h" | 23 | #include "gmutil.h" |
2 | 24 | ||
3 | #include <the_Foundation/regexp.h> | 25 | #include <the_Foundation/regexp.h> |
@@ -5,12 +27,17 @@ | |||
5 | #include <the_Foundation/path.h> | 27 | #include <the_Foundation/path.h> |
6 | 28 | ||
7 | void init_Url(iUrl *d, const iString *text) { | 29 | void init_Url(iUrl *d, const iString *text) { |
8 | iRegExp *absPat = | 30 | static iRegExp *absoluteUrlPattern_; |
9 | new_RegExp("([a-z]+:)?(//[^/:?]*)(:[0-9]+)?([^?]*)(\\?.*)?", caseInsensitive_RegExpOption); | 31 | static iRegExp *relativeUrlPattern_; |
32 | if (!absoluteUrlPattern_) { | ||
33 | absoluteUrlPattern_ = new_RegExp("([a-z]+:)?(//[^/:?]*)(:[0-9]+)?([^?]*)(\\?.*)?", | ||
34 | caseInsensitive_RegExpOption); | ||
35 | } | ||
10 | iRegExpMatch m; | 36 | iRegExpMatch m; |
11 | if (matchString_RegExp(absPat, text, &m)) { | 37 | init_RegExpMatch(&m); |
12 | d->protocol = capturedRange_RegExpMatch(&m, 1); | 38 | if (matchString_RegExp(absoluteUrlPattern_, text, &m)) { |
13 | d->host = capturedRange_RegExpMatch(&m, 2); | 39 | d->scheme = capturedRange_RegExpMatch(&m, 1); |
40 | d->host = capturedRange_RegExpMatch(&m, 2); | ||
14 | if (!isEmpty_Range(&d->host)) { | 41 | if (!isEmpty_Range(&d->host)) { |
15 | d->host.start += 2; /* skip the double slash */ | 42 | d->host.start += 2; /* skip the double slash */ |
16 | } | 43 | } |
@@ -18,28 +45,28 @@ void init_Url(iUrl *d, const iString *text) { | |||
18 | if (!isEmpty_Range(&d->port)) { | 45 | if (!isEmpty_Range(&d->port)) { |
19 | d->port.start++; /* omit the colon */ | 46 | d->port.start++; /* omit the colon */ |
20 | } | 47 | } |
21 | d->path = capturedRange_RegExpMatch(&m, 4); | 48 | d->path = capturedRange_RegExpMatch(&m, 4); |
22 | d->query = capturedRange_RegExpMatch(&m, 5); | 49 | d->query = capturedRange_RegExpMatch(&m, 5); |
23 | } | 50 | } |
24 | else { | 51 | else { |
25 | /* Must be a relative path. */ | 52 | /* Must be a relative path. */ |
26 | iZap(*d); | 53 | iZap(*d); |
27 | iRegExp *relPat = new_RegExp("([a-z]+:)?([^?]*)(\\?.*)?", 0); | 54 | if (!relativeUrlPattern_) { |
28 | if (matchString_RegExp(relPat, text, &m)) { | 55 | relativeUrlPattern_ = new_RegExp("([a-z]+:)?([^?]*)(\\?.*)?", 0); |
29 | d->protocol = capturedRange_RegExpMatch(&m, 1); | 56 | } |
30 | d->path = capturedRange_RegExpMatch(&m, 2); | 57 | if (matchString_RegExp(relativeUrlPattern_, text, &m)) { |
31 | d->query = capturedRange_RegExpMatch(&m, 3); | 58 | d->scheme = capturedRange_RegExpMatch(&m, 1); |
59 | d->path = capturedRange_RegExpMatch(&m, 2); | ||
60 | d->query = capturedRange_RegExpMatch(&m, 3); | ||
32 | } | 61 | } |
33 | iRelease(relPat); | ||
34 | } | 62 | } |
35 | iRelease(absPat); | 63 | if (!isEmpty_Range(&d->scheme)) { |
36 | if (!isEmpty_Range(&d->protocol)) { | 64 | d->scheme.end--; /* omit the colon */ |
37 | d->protocol.end--; /* omit the colon */ | ||
38 | } | 65 | } |
39 | } | 66 | } |
40 | 67 | ||
41 | static iRangecc dirPath_(iRangecc path) { | 68 | static iRangecc dirPath_(iRangecc path) { |
42 | const size_t pos = lastIndexOfCStr_Rangecc(&path, "/"); | 69 | const size_t pos = lastIndexOfCStr_Rangecc(path, "/"); |
43 | if (pos == iInvalidPos) return path; | 70 | if (pos == iInvalidPos) return path; |
44 | return (iRangecc){ path.start, path.start + pos }; | 71 | return (iRangecc){ path.start, path.start + pos }; |
45 | } | 72 | } |
@@ -62,13 +89,13 @@ void cleanUrlPath_String(iString *d) { | |||
62 | iUrl parts; | 89 | iUrl parts; |
63 | init_Url(&parts, d); | 90 | init_Url(&parts, d); |
64 | iRangecc seg = iNullRange; | 91 | iRangecc seg = iNullRange; |
65 | while (nextSplit_Rangecc(&parts.path, "/", &seg)) { | 92 | while (nextSplit_Rangecc(parts.path, "/", &seg)) { |
66 | if (equal_Rangecc(&seg, "..")) { | 93 | if (equal_Rangecc(seg, "..")) { |
67 | /* Back up one segment. */ | 94 | /* Back up one segment. */ |
68 | iRangecc last = prevPathSeg_(constEnd_String(&clean), constBegin_String(&clean)); | 95 | iRangecc last = prevPathSeg_(constEnd_String(&clean), constBegin_String(&clean)); |
69 | truncate_Block(&clean.chars, last.start - constBegin_String(&clean)); | 96 | truncate_Block(&clean.chars, last.start - constBegin_String(&clean)); |
70 | } | 97 | } |
71 | else if (equal_Rangecc(&seg, ".")) { | 98 | else if (equal_Rangecc(seg, ".")) { |
72 | /* Skip it. */ | 99 | /* Skip it. */ |
73 | } | 100 | } |
74 | else { | 101 | else { |
@@ -76,11 +103,11 @@ void cleanUrlPath_String(iString *d) { | |||
76 | appendRange_String(&clean, seg); | 103 | appendRange_String(&clean, seg); |
77 | } | 104 | } |
78 | } | 105 | } |
79 | if (endsWith_Rangecc(&parts.path, "/")) { | 106 | if (endsWith_Rangecc(parts.path, "/")) { |
80 | appendCStr_String(&clean, "/"); | 107 | appendCStr_String(&clean, "/"); |
81 | } | 108 | } |
82 | /* Replace with the new path. */ | 109 | /* Replace with the new path. */ |
83 | if (cmpCStrNSc_Rangecc(&parts.path, cstr_String(&clean), size_String(&clean), &iCaseSensitive)) { | 110 | if (cmpCStrNSc_Rangecc(parts.path, cstr_String(&clean), size_String(&clean), &iCaseSensitive)) { |
84 | const size_t pos = parts.path.start - constBegin_String(d); | 111 | const size_t pos = parts.path.start - constBegin_String(d); |
85 | remove_Block(&d->chars, pos, size_Range(&parts.path)); | 112 | remove_Block(&d->chars, pos, size_Range(&parts.path)); |
86 | insertData_Block(&d->chars, pos, cstr_String(&clean), size_String(&clean)); | 113 | insertData_Block(&d->chars, pos, cstr_String(&clean), size_String(&clean)); |
@@ -88,10 +115,10 @@ void cleanUrlPath_String(iString *d) { | |||
88 | deinit_String(&clean); | 115 | deinit_String(&clean); |
89 | } | 116 | } |
90 | 117 | ||
91 | iRangecc urlProtocol_String(const iString *d) { | 118 | iRangecc urlScheme_String(const iString *d) { |
92 | iUrl url; | 119 | iUrl url; |
93 | init_Url(&url, d); | 120 | init_Url(&url, d); |
94 | return url.protocol; | 121 | return url.scheme; |
95 | } | 122 | } |
96 | 123 | ||
97 | iRangecc urlHost_String(const iString *d) { | 124 | iRangecc urlHost_String(const iString *d) { |
@@ -105,20 +132,20 @@ const iString *absoluteUrl_String(const iString *d, const iString *urlMaybeRelat | |||
105 | iUrl rel; | 132 | iUrl rel; |
106 | init_Url(&orig, d); | 133 | init_Url(&orig, d); |
107 | init_Url(&rel, urlMaybeRelative); | 134 | init_Url(&rel, urlMaybeRelative); |
108 | if (equalCase_Rangecc(&rel.protocol, "data") || equalCase_Rangecc(&rel.protocol, "about")) { | 135 | if (equalCase_Rangecc(rel.scheme, "data") || equalCase_Rangecc(rel.scheme, "about")) { |
109 | /* Special case, the contents should be left unparsed. */ | 136 | /* Special case, the contents should be left unparsed. */ |
110 | return urlMaybeRelative; | 137 | return urlMaybeRelative; |
111 | } | 138 | } |
112 | const iBool isRelative = !isDef_(rel.host); | 139 | const iBool isRelative = !isDef_(rel.host); |
113 | iRangecc protocol = range_CStr("gemini"); | 140 | iRangecc scheme = range_CStr("gemini"); |
114 | if (isDef_(rel.protocol)) { | 141 | if (isDef_(rel.scheme)) { |
115 | protocol = rel.protocol; | 142 | scheme = rel.scheme; |
116 | } | 143 | } |
117 | else if (isRelative && isDef_(orig.protocol)) { | 144 | else if (isRelative && isDef_(orig.scheme)) { |
118 | protocol = orig.protocol; | 145 | scheme = orig.scheme; |
119 | } | 146 | } |
120 | iString *absolute = collectNew_String(); | 147 | iString *absolute = collectNew_String(); |
121 | appendRange_String(absolute, protocol); | 148 | appendRange_String(absolute, scheme); |
122 | appendCStr_String(absolute, "://"); { | 149 | appendCStr_String(absolute, "://"); { |
123 | const iUrl *selHost = isDef_(rel.host) ? &rel : &orig; | 150 | const iUrl *selHost = isDef_(rel.host) ? &rel : &orig; |
124 | appendRange_String(absolute, selHost->host); | 151 | appendRange_String(absolute, selHost->host); |
@@ -127,11 +154,11 @@ const iString *absoluteUrl_String(const iString *d, const iString *urlMaybeRelat | |||
127 | appendRange_String(absolute, selHost->port); | 154 | appendRange_String(absolute, selHost->port); |
128 | } | 155 | } |
129 | } | 156 | } |
130 | if (isDef_(rel.protocol) || isDef_(rel.host) || startsWith_Rangecc(&rel.path, "/")) { | 157 | if (isDef_(rel.scheme) || isDef_(rel.host) || startsWith_Rangecc(rel.path, "/")) { |
131 | appendRange_String(absolute, rel.path); /* absolute path */ | 158 | appendRange_String(absolute, isDef_(rel.path) ? rel.path : range_CStr("/")); /* absolute path */ |
132 | } | 159 | } |
133 | else { | 160 | else { |
134 | if (!endsWith_Rangecc(&orig.path, "/")) { | 161 | if (!endsWith_Rangecc(orig.path, "/")) { |
135 | /* Referencing a file. */ | 162 | /* Referencing a file. */ |
136 | appendRange_String(absolute, dirPath_(orig.path)); | 163 | appendRange_String(absolute, dirPath_(orig.path)); |
137 | } | 164 | } |
@@ -200,6 +227,16 @@ static const struct { | |||
200 | "Invalid Redirect", | 227 | "Invalid Redirect", |
201 | "The server responded with a redirect but did not provide a valid destination URL. " | 228 | "The server responded with a redirect but did not provide a valid destination URL. " |
202 | "Perhaps the server is malfunctioning." } }, | 229 | "Perhaps the server is malfunctioning." } }, |
230 | { nonGeminiRedirect_GmStatusCode, | ||
231 | { 0x27a0, /* dashed arrow */ | ||
232 | "Redirect to Non-Gemini URL", | ||
233 | "The server attempted to redirect us to a non-Gemini URL. Here is the link so you " | ||
234 | "can open it manually if appropriate."} }, | ||
235 | { tooManyRedirects_GmStatusCode, | ||
236 | { 0x27a0, /* dashed arrow */ | ||
237 | "Too Many Redirects", | ||
238 | "You may be stuck in a redirection loop. The next redirected URL is below if you " | ||
239 | "want to continue manually."} }, | ||
203 | { temporaryFailure_GmStatusCode, | 240 | { temporaryFailure_GmStatusCode, |
204 | { 0x1f50c, /* electric plug */ | 241 | { 0x1f50c, /* electric plug */ |
205 | "Temporary Failure", | 242 | "Temporary Failure", |