summaryrefslogtreecommitdiff
path: root/src/gmutil.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gmutil.c')
-rw-r--r--src/gmutil.c103
1 files changed, 70 insertions, 33 deletions
diff --git a/src/gmutil.c b/src/gmutil.c
index 2138caa3..131734b2 100644
--- a/src/gmutil.c
+++ b/src/gmutil.c
@@ -1,3 +1,25 @@
1/* Copyright 2020 Jaakko Keränen <jaakko.keranen@iki.fi>
2
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions are met:
5
61. Redistributions of source code must retain the above copyright notice, this
7 list of conditions and the following disclaimer.
82. Redistributions in binary form must reproduce the above copyright notice,
9 this list of conditions and the following disclaimer in the documentation
10 and/or other materials provided with the distribution.
11
12THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
13ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
16ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
19ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
22
1#include "gmutil.h" 23#include "gmutil.h"
2 24
3#include <the_Foundation/regexp.h> 25#include <the_Foundation/regexp.h>
@@ -5,12 +27,17 @@
5#include <the_Foundation/path.h> 27#include <the_Foundation/path.h>
6 28
7void init_Url(iUrl *d, const iString *text) { 29void init_Url(iUrl *d, const iString *text) {
8 iRegExp *absPat = 30 static iRegExp *absoluteUrlPattern_;
9 new_RegExp("([a-z]+:)?(//[^/:?]*)(:[0-9]+)?([^?]*)(\\?.*)?", caseInsensitive_RegExpOption); 31 static iRegExp *relativeUrlPattern_;
32 if (!absoluteUrlPattern_) {
33 absoluteUrlPattern_ = new_RegExp("([a-z]+:)?(//[^/:?]*)(:[0-9]+)?([^?]*)(\\?.*)?",
34 caseInsensitive_RegExpOption);
35 }
10 iRegExpMatch m; 36 iRegExpMatch m;
11 if (matchString_RegExp(absPat, text, &m)) { 37 init_RegExpMatch(&m);
12 d->protocol = capturedRange_RegExpMatch(&m, 1); 38 if (matchString_RegExp(absoluteUrlPattern_, text, &m)) {
13 d->host = capturedRange_RegExpMatch(&m, 2); 39 d->scheme = capturedRange_RegExpMatch(&m, 1);
40 d->host = capturedRange_RegExpMatch(&m, 2);
14 if (!isEmpty_Range(&d->host)) { 41 if (!isEmpty_Range(&d->host)) {
15 d->host.start += 2; /* skip the double slash */ 42 d->host.start += 2; /* skip the double slash */
16 } 43 }
@@ -18,28 +45,28 @@ void init_Url(iUrl *d, const iString *text) {
18 if (!isEmpty_Range(&d->port)) { 45 if (!isEmpty_Range(&d->port)) {
19 d->port.start++; /* omit the colon */ 46 d->port.start++; /* omit the colon */
20 } 47 }
21 d->path = capturedRange_RegExpMatch(&m, 4); 48 d->path = capturedRange_RegExpMatch(&m, 4);
22 d->query = capturedRange_RegExpMatch(&m, 5); 49 d->query = capturedRange_RegExpMatch(&m, 5);
23 } 50 }
24 else { 51 else {
25 /* Must be a relative path. */ 52 /* Must be a relative path. */
26 iZap(*d); 53 iZap(*d);
27 iRegExp *relPat = new_RegExp("([a-z]+:)?([^?]*)(\\?.*)?", 0); 54 if (!relativeUrlPattern_) {
28 if (matchString_RegExp(relPat, text, &m)) { 55 relativeUrlPattern_ = new_RegExp("([a-z]+:)?([^?]*)(\\?.*)?", 0);
29 d->protocol = capturedRange_RegExpMatch(&m, 1); 56 }
30 d->path = capturedRange_RegExpMatch(&m, 2); 57 if (matchString_RegExp(relativeUrlPattern_, text, &m)) {
31 d->query = capturedRange_RegExpMatch(&m, 3); 58 d->scheme = capturedRange_RegExpMatch(&m, 1);
59 d->path = capturedRange_RegExpMatch(&m, 2);
60 d->query = capturedRange_RegExpMatch(&m, 3);
32 } 61 }
33 iRelease(relPat);
34 } 62 }
35 iRelease(absPat); 63 if (!isEmpty_Range(&d->scheme)) {
36 if (!isEmpty_Range(&d->protocol)) { 64 d->scheme.end--; /* omit the colon */
37 d->protocol.end--; /* omit the colon */
38 } 65 }
39} 66}
40 67
41static iRangecc dirPath_(iRangecc path) { 68static iRangecc dirPath_(iRangecc path) {
42 const size_t pos = lastIndexOfCStr_Rangecc(&path, "/"); 69 const size_t pos = lastIndexOfCStr_Rangecc(path, "/");
43 if (pos == iInvalidPos) return path; 70 if (pos == iInvalidPos) return path;
44 return (iRangecc){ path.start, path.start + pos }; 71 return (iRangecc){ path.start, path.start + pos };
45} 72}
@@ -62,13 +89,13 @@ void cleanUrlPath_String(iString *d) {
62 iUrl parts; 89 iUrl parts;
63 init_Url(&parts, d); 90 init_Url(&parts, d);
64 iRangecc seg = iNullRange; 91 iRangecc seg = iNullRange;
65 while (nextSplit_Rangecc(&parts.path, "/", &seg)) { 92 while (nextSplit_Rangecc(parts.path, "/", &seg)) {
66 if (equal_Rangecc(&seg, "..")) { 93 if (equal_Rangecc(seg, "..")) {
67 /* Back up one segment. */ 94 /* Back up one segment. */
68 iRangecc last = prevPathSeg_(constEnd_String(&clean), constBegin_String(&clean)); 95 iRangecc last = prevPathSeg_(constEnd_String(&clean), constBegin_String(&clean));
69 truncate_Block(&clean.chars, last.start - constBegin_String(&clean)); 96 truncate_Block(&clean.chars, last.start - constBegin_String(&clean));
70 } 97 }
71 else if (equal_Rangecc(&seg, ".")) { 98 else if (equal_Rangecc(seg, ".")) {
72 /* Skip it. */ 99 /* Skip it. */
73 } 100 }
74 else { 101 else {
@@ -76,11 +103,11 @@ void cleanUrlPath_String(iString *d) {
76 appendRange_String(&clean, seg); 103 appendRange_String(&clean, seg);
77 } 104 }
78 } 105 }
79 if (endsWith_Rangecc(&parts.path, "/")) { 106 if (endsWith_Rangecc(parts.path, "/")) {
80 appendCStr_String(&clean, "/"); 107 appendCStr_String(&clean, "/");
81 } 108 }
82 /* Replace with the new path. */ 109 /* Replace with the new path. */
83 if (cmpCStrNSc_Rangecc(&parts.path, cstr_String(&clean), size_String(&clean), &iCaseSensitive)) { 110 if (cmpCStrNSc_Rangecc(parts.path, cstr_String(&clean), size_String(&clean), &iCaseSensitive)) {
84 const size_t pos = parts.path.start - constBegin_String(d); 111 const size_t pos = parts.path.start - constBegin_String(d);
85 remove_Block(&d->chars, pos, size_Range(&parts.path)); 112 remove_Block(&d->chars, pos, size_Range(&parts.path));
86 insertData_Block(&d->chars, pos, cstr_String(&clean), size_String(&clean)); 113 insertData_Block(&d->chars, pos, cstr_String(&clean), size_String(&clean));
@@ -88,10 +115,10 @@ void cleanUrlPath_String(iString *d) {
88 deinit_String(&clean); 115 deinit_String(&clean);
89} 116}
90 117
91iRangecc urlProtocol_String(const iString *d) { 118iRangecc urlScheme_String(const iString *d) {
92 iUrl url; 119 iUrl url;
93 init_Url(&url, d); 120 init_Url(&url, d);
94 return url.protocol; 121 return url.scheme;
95} 122}
96 123
97iRangecc urlHost_String(const iString *d) { 124iRangecc urlHost_String(const iString *d) {
@@ -105,20 +132,20 @@ const iString *absoluteUrl_String(const iString *d, const iString *urlMaybeRelat
105 iUrl rel; 132 iUrl rel;
106 init_Url(&orig, d); 133 init_Url(&orig, d);
107 init_Url(&rel, urlMaybeRelative); 134 init_Url(&rel, urlMaybeRelative);
108 if (equalCase_Rangecc(&rel.protocol, "data") || equalCase_Rangecc(&rel.protocol, "about")) { 135 if (equalCase_Rangecc(rel.scheme, "data") || equalCase_Rangecc(rel.scheme, "about")) {
109 /* Special case, the contents should be left unparsed. */ 136 /* Special case, the contents should be left unparsed. */
110 return urlMaybeRelative; 137 return urlMaybeRelative;
111 } 138 }
112 const iBool isRelative = !isDef_(rel.host); 139 const iBool isRelative = !isDef_(rel.host);
113 iRangecc protocol = range_CStr("gemini"); 140 iRangecc scheme = range_CStr("gemini");
114 if (isDef_(rel.protocol)) { 141 if (isDef_(rel.scheme)) {
115 protocol = rel.protocol; 142 scheme = rel.scheme;
116 } 143 }
117 else if (isRelative && isDef_(orig.protocol)) { 144 else if (isRelative && isDef_(orig.scheme)) {
118 protocol = orig.protocol; 145 scheme = orig.scheme;
119 } 146 }
120 iString *absolute = collectNew_String(); 147 iString *absolute = collectNew_String();
121 appendRange_String(absolute, protocol); 148 appendRange_String(absolute, scheme);
122 appendCStr_String(absolute, "://"); { 149 appendCStr_String(absolute, "://"); {
123 const iUrl *selHost = isDef_(rel.host) ? &rel : &orig; 150 const iUrl *selHost = isDef_(rel.host) ? &rel : &orig;
124 appendRange_String(absolute, selHost->host); 151 appendRange_String(absolute, selHost->host);
@@ -127,11 +154,11 @@ const iString *absoluteUrl_String(const iString *d, const iString *urlMaybeRelat
127 appendRange_String(absolute, selHost->port); 154 appendRange_String(absolute, selHost->port);
128 } 155 }
129 } 156 }
130 if (isDef_(rel.protocol) || isDef_(rel.host) || startsWith_Rangecc(&rel.path, "/")) { 157 if (isDef_(rel.scheme) || isDef_(rel.host) || startsWith_Rangecc(rel.path, "/")) {
131 appendRange_String(absolute, rel.path); /* absolute path */ 158 appendRange_String(absolute, isDef_(rel.path) ? rel.path : range_CStr("/")); /* absolute path */
132 } 159 }
133 else { 160 else {
134 if (!endsWith_Rangecc(&orig.path, "/")) { 161 if (!endsWith_Rangecc(orig.path, "/")) {
135 /* Referencing a file. */ 162 /* Referencing a file. */
136 appendRange_String(absolute, dirPath_(orig.path)); 163 appendRange_String(absolute, dirPath_(orig.path));
137 } 164 }
@@ -200,6 +227,16 @@ static const struct {
200 "Invalid Redirect", 227 "Invalid Redirect",
201 "The server responded with a redirect but did not provide a valid destination URL. " 228 "The server responded with a redirect but did not provide a valid destination URL. "
202 "Perhaps the server is malfunctioning." } }, 229 "Perhaps the server is malfunctioning." } },
230 { nonGeminiRedirect_GmStatusCode,
231 { 0x27a0, /* dashed arrow */
232 "Redirect to Non-Gemini URL",
233 "The server attempted to redirect us to a non-Gemini URL. Here is the link so you "
234 "can open it manually if appropriate."} },
235 { tooManyRedirects_GmStatusCode,
236 { 0x27a0, /* dashed arrow */
237 "Too Many Redirects",
238 "You may be stuck in a redirection loop. The next redirected URL is below if you "
239 "want to continue manually."} },
203 { temporaryFailure_GmStatusCode, 240 { temporaryFailure_GmStatusCode,
204 { 0x1f50c, /* electric plug */ 241 { 0x1f50c, /* electric plug */
205 "Temporary Failure", 242 "Temporary Failure",