From 73a721fc93c3be7b13361dea41d4431ad14a3fdd Mon Sep 17 00:00:00 2001 From: Jaakko Keränen Date: Wed, 30 Jun 2021 08:20:38 +0300 Subject: Canonical URIs Internally, all URIs should be converted to a canonical form so that they can be compared against each other. The canonical form is an IRI with spaces and reserved characters percent-encoded. --- src/gmdocument.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gmdocument.c') diff --git a/src/gmdocument.c b/src/gmdocument.c index f15d9d1d..8832271a 100644 --- a/src/gmdocument.c +++ b/src/gmdocument.c @@ -1554,13 +1554,14 @@ static void normalize_GmDocument(iGmDocument *d) { printf("wasNormalized: %d\n", wasNormalized); fflush(stdout); set_String(&d->source, collect_String(normalized)); - normalize_String(&d->source); /* NFC */ + //normalize_String(&d->source); /* NFC */ printf("orig:%zu norm:%zu\n", size_String(&d->unormSource), size_String(&d->source)); /* normalized source has an extra newline at the end */ // iAssert(wasNormalized || equal_String(&d->unormSource, &d->source)); } void setUrl_GmDocument(iGmDocument *d, const iString *url) { + url = canonicalUrl_String(url); set_String(&d->url, url); iUrl parts; init_Url(&parts, url); -- cgit v1.2.3