summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaakko Keränen <jaakko.keranen@iki.fi>2021-03-16 12:35:49 +0200
committerJaakko Keränen <jaakko.keranen@iki.fi>2021-03-16 12:35:49 +0200
commitce1ac1ba801d6f09ec028838d5fed863ffce6a9a (patch)
tree7e7e25f9480130f8f479efe5a0396c41eadb6a0f
parentee495092d59f5d5cda9491ebdaa7ef7f107341a3 (diff)
Translation: More reliable markup preservation
-rw-r--r--src/gmdocument.c22
-rw-r--r--src/gmdocument.h15
-rw-r--r--src/ui/translation.c144
-rw-r--r--src/ui/translation.h2
4 files changed, 138 insertions, 45 deletions
diff --git a/src/gmdocument.c b/src/gmdocument.c
index bade99ce..2c01290e 100644
--- a/src/gmdocument.c
+++ b/src/gmdocument.c
@@ -89,22 +89,14 @@ struct Impl_GmDocument {
89 89
90iDefineObjectConstruction(GmDocument) 90iDefineObjectConstruction(GmDocument)
91 91
92enum iGmLineType {
93 text_GmLineType,
94 bullet_GmLineType,
95 preformatted_GmLineType,
96 quote_GmLineType,
97 heading1_GmLineType,
98 heading2_GmLineType,
99 heading3_GmLineType,
100 link_GmLineType,
101 max_GmLineType,
102};
103
104static enum iGmLineType lineType_GmDocument_(const iGmDocument *d, const iRangecc line) { 92static enum iGmLineType lineType_GmDocument_(const iGmDocument *d, const iRangecc line) {
105 if (d->format == plainText_GmDocumentFormat) { 93 if (d->format == plainText_GmDocumentFormat) {
106 return text_GmLineType; 94 return text_GmLineType;
107 } 95 }
96 return lineType_Rangecc(line);
97}
98
99enum iGmLineType lineType_Rangecc(const iRangecc line) {
108 if (isEmpty_Range(&line)) { 100 if (isEmpty_Range(&line)) {
109 return text_GmLineType; 101 return text_GmLineType;
110 } 102 }
@@ -132,7 +124,7 @@ static enum iGmLineType lineType_GmDocument_(const iGmDocument *d, const iRangec
132 return text_GmLineType; 124 return text_GmLineType;
133} 125}
134 126
135static void trimLine_Rangecc_(iRangecc *line, enum iGmLineType type, iBool normalize) { 127void trimLine_Rangecc(iRangecc *line, enum iGmLineType type, iBool normalize) {
136 static const unsigned int skip[max_GmLineType] = { 0, 2, 3, 1, 1, 2, 3, 0 }; 128 static const unsigned int skip[max_GmLineType] = { 0, 2, 3, 1, 1, 2, 3, 0 };
137 line->start += skip[type]; 129 line->start += skip[type];
138 if (normalize || (type >= heading1_GmLineType && type <= heading3_GmLineType)) { 130 if (normalize || (type >= heading1_GmLineType && type <= heading3_GmLineType)) {
@@ -400,7 +392,7 @@ static void doLayout_GmDocument_(iGmDocument *d) {
400 d->size.x /*- indents[preformatted_GmLineType] * gap_Text*/) { 392 d->size.x /*- indents[preformatted_GmLineType] * gap_Text*/) {
401 preFont = preformattedSmall_FontId; 393 preFont = preformattedSmall_FontId;
402 } 394 }
403 trimLine_Rangecc_(&line, type, isNormalized); 395 trimLine_Rangecc(&line, type, isNormalized);
404 preAltText = line; 396 preAltText = line;
405 /* TODO: store and link the alt text to this run */ 397 /* TODO: store and link the alt text to this run */
406 continue; 398 continue;
@@ -412,7 +404,7 @@ static void doLayout_GmDocument_(iGmDocument *d) {
412 type = text_GmLineType; 404 type = text_GmLineType;
413 } 405 }
414 } 406 }
415 trimLine_Rangecc_(&line, type, isNormalized); 407 trimLine_Rangecc(&line, type, isNormalized);
416 run.font = fonts[type]; 408 run.font = fonts[type];
417 /* Remember headings for the document outline. */ 409 /* Remember headings for the document outline. */
418 if (type == heading1_GmLineType || type == heading2_GmLineType || type == heading3_GmLineType) { 410 if (type == heading1_GmLineType || type == heading2_GmLineType || type == heading3_GmLineType) {
diff --git a/src/gmdocument.h b/src/gmdocument.h
index 98eef152..05c6da09 100644
--- a/src/gmdocument.h
+++ b/src/gmdocument.h
@@ -34,6 +34,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
34iDeclareType(GmHeading) 34iDeclareType(GmHeading)
35iDeclareType(GmRun) 35iDeclareType(GmRun)
36 36
37enum iGmLineType {
38 text_GmLineType,
39 bullet_GmLineType,
40 preformatted_GmLineType,
41 quote_GmLineType,
42 heading1_GmLineType,
43 heading2_GmLineType,
44 heading3_GmLineType,
45 link_GmLineType,
46 max_GmLineType,
47};
48
49enum iGmLineType lineType_Rangecc (const iRangecc line);
50void trimLine_Rangecc (iRangecc *line, enum iGmLineType type, iBool normalize);
51
37enum iGmDocumentTheme { 52enum iGmDocumentTheme {
38 colorfulDark_GmDocumentTheme, 53 colorfulDark_GmDocumentTheme,
39 colorfulLight_GmDocumentTheme, 54 colorfulLight_GmDocumentTheme,
diff --git a/src/ui/translation.c b/src/ui/translation.c
index 63d64d1b..e5059f38 100644
--- a/src/ui/translation.c
+++ b/src/ui/translation.c
@@ -111,7 +111,7 @@ static void draw_TranslationProgressWidget_(const iTranslationProgressWidget *d)
111 const iInt2 mid = mid_Rect(bounds); 111 const iInt2 mid = mid_Rect(bounds);
112 SDL_SetRenderDrawBlendMode(renderer_Window(get_Window()), SDL_BLENDMODE_BLEND); 112 SDL_SetRenderDrawBlendMode(renderer_Window(get_Window()), SDL_BLENDMODE_BLEND);
113 iConstForEach(Array, i, &d->sprites) { 113 iConstForEach(Array, i, &d->sprites) {
114 const int index = index_ArrayConstIterator(&i); 114 const int index = (int) index_ArrayConstIterator(&i);
115 const float angle = (float) index; 115 const float angle = (float) index;
116 const iSprite *spr = i.value; 116 const iSprite *spr = i.value;
117 const float opacity = iClamp(t - index * 0.5f, 0.0, 1.0f); 117 const float opacity = iClamp(t - index * 0.5f, 0.0, 1.0f);
@@ -151,12 +151,12 @@ iDefineTypeConstructionArgs(Translation, (iDocumentWidget *doc), doc)
151static const char * translationServiceHost = "xlt.skyjake.fi"; 151static const char * translationServiceHost = "xlt.skyjake.fi";
152static const uint16_t translationServicePort = 443; 152static const uint16_t translationServicePort = 443;
153 153
154static const char *doubleArrowSymbol = "\u20e2"; /* prevent getting mangled */ 154//static const char *doubleArrowSymbol = "\U0001f192"; //"\u20e2"; /* prevent getting mangled */
155static const char *tripleBacktickSymbol = "\u20e3"; 155//static const char *tripleBacktickSymbol = "\U0001f1a9"; //\u20e3";
156static const char *h1Symbol = "\u20e4"; 156//static const char *h1Symbol = "\U0001f19d";
157static const char *h2Symbol = "\u20e5"; 157//static const char *h2Symbol = "\U0001f19e";
158static const char *h3Symbol = "\u20e6"; 158//static const char *h3Symbol = "\U0001f19f";
159static const char *bulletSymbol = "\n\u20e7"; 159//static const char *bulletSymbol = "\n\U0001f196";
160 160
161static iString *quote_String_(const iString *d) { 161static iString *quote_String_(const iString *d) {
162 iString *quot = new_String(); 162 iString *quot = new_String();
@@ -177,8 +177,19 @@ static iString *quote_String_(const iString *d) {
177 else if (ch == '\t') { 177 else if (ch == '\t') {
178 appendCStr_String(quot, "\\t"); 178 appendCStr_String(quot, "\\t");
179 } 179 }
180 else if (ch >= 0x100) { 180 else if (ch >= 0x80) {
181 appendFormat_String(quot, "\\u%04x", ch); 181 if ((ch >= 0xD800 && ch < 0xE000) || ch >= 0x10000) {
182 /* TODO: Add a helper function? */
183 /* UTF-16 surrogate pair */
184 iString *chs = newUnicodeN_String(&ch, 1);
185 iBlock *u16 = toUtf16_String(chs);
186 delete_String(chs);
187 const uint16_t *ch16 = constData_Block(u16);
188 appendFormat_String(quot, "\\u%04x\\u%04x", ch16[0], ch16[1]);
189 }
190 else {
191 appendFormat_String(quot, "\\u%04x", ch);
192 }
182 } 193 }
183 else { 194 else {
184 appendChar_String(quot, ch); 195 appendChar_String(quot, ch);
@@ -212,13 +223,27 @@ static iString *unquote_String_(const iString *d) {
212 else if (esc == 'u') { 223 else if (esc == 'u') {
213 char digits[5]; 224 char digits[5];
214 iZap(digits); 225 iZap(digits);
215 iForIndices(j, digits) { 226 for (size_t j = 0; j < 4; j++) {
216 next_StringConstIterator(&i); 227 next_StringConstIterator(&i);
217 digits[j] = *i.pos; 228 digits[j] = *i.pos;
218 } 229 }
219 iChar codepoint = strtoul(digits, NULL, 16); 230 uint16_t ch16[2] = { strtoul(digits, NULL, 16), 0 };
220 if (codepoint) { 231 if (ch16[0] < 0xD800 || ch16[0] >= 0xE000) {
221 appendChar_String(unquot, codepoint); 232 appendChar_String(unquot, ch16[0]);
233 }
234 else {
235 /* UTF-16 surrogate pair */
236 next_StringConstIterator(&i);
237 next_StringConstIterator(&i);
238 iZap(digits);
239 for (size_t j = 0; j < 4; j++) {
240 next_StringConstIterator(&i);
241 digits[j] = *i.pos;
242 }
243 ch16[1] = strtoul(digits, NULL, 16);
244 iString *u16 = newUtf16N_String(ch16, 2);
245 append_String(unquot, u16);
246 delete_String(u16);
222 } 247 }
223 } 248 }
224 else { 249 else {
@@ -243,6 +268,7 @@ void init_Translation(iTranslation *d, iDocumentWidget *doc) {
243 d->doc = doc; /* owner */ 268 d->doc = doc; /* owner */
244 d->request = new_TlsRequest(); 269 d->request = new_TlsRequest();
245 d->timer = 0; 270 d->timer = 0;
271 init_Array(&d->lineTypes, sizeof(int));
246 setUserData_Object(d->request, d->doc); 272 setUserData_Object(d->request, d->doc);
247 setHost_TlsRequest(d->request, 273 setHost_TlsRequest(d->request,
248 collectNewCStr_String(translationServiceHost), 274 collectNewCStr_String(translationServiceHost),
@@ -257,6 +283,7 @@ void deinit_Translation(iTranslation *d) {
257 cancel_TlsRequest(d->request); 283 cancel_TlsRequest(d->request);
258 iRelease(d->request); 284 iRelease(d->request);
259 destroy_Widget(d->dlg); 285 destroy_Widget(d->dlg);
286 deinit_Array(&d->lineTypes);
260} 287}
261 288
262static uint32_t animate_Translation_(uint32_t interval, iAny *ptr) { 289static uint32_t animate_Translation_(uint32_t interval, iAny *ptr) {
@@ -270,13 +297,32 @@ void submit_Translation(iTranslation *d) {
270 const char *idTo = languageId_String(text_LabelWidget(findChild_Widget(d->dlg, "xlt.to"))); 297 const char *idTo = languageId_String(text_LabelWidget(findChild_Widget(d->dlg, "xlt.to")));
271 iAssert(status_TlsRequest(d->request) != submitted_TlsRequestStatus); 298 iAssert(status_TlsRequest(d->request) != submitted_TlsRequestStatus);
272 iBlock *json = collect_Block(new_Block(0)); 299 iBlock *json = collect_Block(new_Block(0));
273 iString *docSrc = collect_String(copy_String(source_GmDocument(document_DocumentWidget(d->doc)))); 300 iString *docSrc = collectNew_String();
274 replace_String(docSrc, "=>", doubleArrowSymbol); 301 /* TODO: Strip all markup and remember it. These are reapplied when reading response. */ {
275 replace_String(docSrc, "```", tripleBacktickSymbol); 302 iRangecc line = iNullRange;
276 replace_String(docSrc, "###", h3Symbol); 303 while (nextSplit_Rangecc(
277 replace_String(docSrc, "##", h2Symbol); 304 range_String(source_GmDocument(document_DocumentWidget(d->doc))), "\n", &line)) {
278 replace_String(docSrc, "#", h1Symbol); 305 iRangecc cleanLine = trimmed_Rangecc(line);
279 replace_String(docSrc, "\n*", bulletSymbol); 306 const int lineType = lineType_Rangecc(cleanLine);
307 pushBack_Array(&d->lineTypes, &lineType);
308 if (lineType == link_GmLineType) {
309 cleanLine.start += 2; /* skip over the => */
310 }
311 else {
312 trimLine_Rangecc(&cleanLine, lineType, iTrue); /* removes the prefix */
313 }
314 if (!isEmpty_String(docSrc)) {
315 appendCStr_String(docSrc, "\n");
316 }
317 appendRange_String(docSrc, cleanLine);
318 }
319 }
320// replace_String(docSrc, "=>", doubleArrowSymbol);
321// replace_String(docSrc, "```", tripleBacktickSymbol);
322// replace_String(docSrc, "###", h3Symbol);
323// replace_String(docSrc, "##", h2Symbol);
324// replace_String(docSrc, "#", h1Symbol);
325// replace_String(docSrc, "\n*", bulletSymbol);
280 printf_Block(json, 326 printf_Block(json,
281 "{\"q\":\"%s\",\"source\":\"%s\",\"target\":\"%s\"}", 327 "{\"q\":\"%s\",\"source\":\"%s\",\"target\":\"%s\"}",
282 cstrCollect_String(quote_String_(docSrc)), 328 cstrCollect_String(quote_String_(docSrc)),
@@ -286,7 +332,7 @@ void submit_Translation(iTranslation *d) {
286 printf_Block(msg, "POST /translate HTTP/1.1\r\n" 332 printf_Block(msg, "POST /translate HTTP/1.1\r\n"
287 "Host: xlt.skyjake.fi\r\n" 333 "Host: xlt.skyjake.fi\r\n"
288 "Connection: close\r\n" 334 "Connection: close\r\n"
289 "Content-Type: application/json\r\n" 335 "Content-Type: application/json; charset=utf-8\r\n"
290 "Content-Length: %zu\r\n\r\n", size_Block(json)); 336 "Content-Length: %zu\r\n\r\n", size_Block(json));
291 append_Block(msg, json); 337 append_Block(msg, json);
292 setContent_TlsRequest(d->request, msg); 338 setContent_TlsRequest(d->request, msg);
@@ -310,20 +356,58 @@ static iBool processResult_Translation_(iTranslation *d) {
310 return iFalse; 356 return iFalse;
311 } 357 }
312 iBlock *resultData = collect_Block(readAll_TlsRequest(d->request)); 358 iBlock *resultData = collect_Block(readAll_TlsRequest(d->request));
313// printf("result(%zu):\n%s\n", size_Block(resultData), cstr_Block(resultData)); 359 printf("result(%zu):\n%s\n", size_Block(resultData), cstr_Block(resultData));
314// fflush(stdout); 360 fflush(stdout);
315 iRegExp *pattern = iClob(new_RegExp(".*translatedText\":\"(.*)\"\\}", caseSensitive_RegExpOption)); 361 iRegExp *pattern = iClob(new_RegExp(".*translatedText\":\"(.*)\"\\}", caseSensitive_RegExpOption));
316 iRegExpMatch m; 362 iRegExpMatch m;
317 init_RegExpMatch(&m); 363 init_RegExpMatch(&m);
318 if (matchRange_RegExp(pattern, range_Block(resultData), &m)) { 364 if (matchRange_RegExp(pattern, range_Block(resultData), &m)) {
319 iString *translation = unquote_String_(collect_String(captured_RegExpMatch(&m, 1))); 365 iString *translation = unquote_String_(collect_String(captured_RegExpMatch(&m, 1)));
320 replace_String(translation, tripleBacktickSymbol, "```"); 366 iString *marked = collectNew_String();
321 replace_String(translation, doubleArrowSymbol, "=>"); 367 iRangecc line;
322 replace_String(translation, h3Symbol, "### "); 368 size_t lineIndex = 0;
323 replace_String(translation, h2Symbol, "## "); 369 while (nextSplit_Rangecc(range_String(translation), "\n", &line)) {
324 replace_String(translation, h1Symbol, "# "); 370 iRangecc cleanLine = trimmed_Rangecc(line);
325 replace_String(translation, bulletSymbol, "\n* "); 371 if (!isEmpty_String(marked)) {
326 setSource_DocumentWidget(d->doc, translation); 372 appendCStr_String(marked, "\n");
373 }
374 if (lineIndex < size_Array(&d->lineTypes)) {
375 switch (value_Array(&d->lineTypes, lineIndex, int)) {
376 case bullet_GmLineType:
377 appendCStr_String(marked, "* ");
378 break;
379 case link_GmLineType:
380 appendCStr_String(marked, "=> ");
381 break;
382 case quote_GmLineType:
383 appendCStr_String(marked, "> ");
384 break;
385 case preformatted_GmLineType:
386 appendCStr_String(marked, "```");
387 break;
388 case heading1_GmLineType:
389 appendCStr_String(marked, "# ");
390 break;
391 case heading2_GmLineType:
392 appendCStr_String(marked, "## ");
393 break;
394 case heading3_GmLineType:
395 appendCStr_String(marked, "### ");
396 break;
397 default:
398 break;
399 }
400 }
401 appendRange_String(marked, cleanLine);
402 lineIndex++;
403 }
404// replace_String(translation, tripleBacktickSymbol, "```");
405// replace_String(translation, doubleArrowSymbol, "=>");
406// replace_String(translation, h3Symbol, "### ");
407// replace_String(translation, h2Symbol, "## ");
408// replace_String(translation, h1Symbol, "# ");
409// replace_String(translation, bulletSymbol, "\n* ");
410 setSource_DocumentWidget(d->doc, marked);
327 postCommand_App("sidebar.update"); 411 postCommand_App("sidebar.update");
328 delete_String(translation); 412 delete_String(translation);
329 } 413 }
diff --git a/src/ui/translation.h b/src/ui/translation.h
index 6966e468..d6506415 100644
--- a/src/ui/translation.h
+++ b/src/ui/translation.h
@@ -22,6 +22,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
22 22
23#pragma once 23#pragma once
24 24
25#include <the_Foundation/array.h>
25#include <the_Foundation/string.h> 26#include <the_Foundation/string.h>
26#include <the_Foundation/tlsrequest.h> 27#include <the_Foundation/tlsrequest.h>
27 28
@@ -35,6 +36,7 @@ struct Impl_Translation {
35 uint32_t startTime; 36 uint32_t startTime;
36 iDocumentWidget *doc; 37 iDocumentWidget *doc;
37 int timer; 38 int timer;
39 iArray lineTypes;
38}; 40};
39 41
40iDeclareTypeConstructionArgs(Translation, iDocumentWidget *) 42iDeclareTypeConstructionArgs(Translation, iDocumentWidget *)