diff options
author | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-03-16 12:35:49 +0200 |
---|---|---|
committer | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-03-16 12:35:49 +0200 |
commit | ce1ac1ba801d6f09ec028838d5fed863ffce6a9a (patch) | |
tree | 7e7e25f9480130f8f479efe5a0396c41eadb6a0f /src/ui/translation.c | |
parent | ee495092d59f5d5cda9491ebdaa7ef7f107341a3 (diff) |
Translation: More reliable markup preservation
Diffstat (limited to 'src/ui/translation.c')
-rw-r--r-- | src/ui/translation.c | 144 |
1 files changed, 114 insertions, 30 deletions
diff --git a/src/ui/translation.c b/src/ui/translation.c index 63d64d1b..e5059f38 100644 --- a/src/ui/translation.c +++ b/src/ui/translation.c | |||
@@ -111,7 +111,7 @@ static void draw_TranslationProgressWidget_(const iTranslationProgressWidget *d) | |||
111 | const iInt2 mid = mid_Rect(bounds); | 111 | const iInt2 mid = mid_Rect(bounds); |
112 | SDL_SetRenderDrawBlendMode(renderer_Window(get_Window()), SDL_BLENDMODE_BLEND); | 112 | SDL_SetRenderDrawBlendMode(renderer_Window(get_Window()), SDL_BLENDMODE_BLEND); |
113 | iConstForEach(Array, i, &d->sprites) { | 113 | iConstForEach(Array, i, &d->sprites) { |
114 | const int index = index_ArrayConstIterator(&i); | 114 | const int index = (int) index_ArrayConstIterator(&i); |
115 | const float angle = (float) index; | 115 | const float angle = (float) index; |
116 | const iSprite *spr = i.value; | 116 | const iSprite *spr = i.value; |
117 | const float opacity = iClamp(t - index * 0.5f, 0.0, 1.0f); | 117 | const float opacity = iClamp(t - index * 0.5f, 0.0, 1.0f); |
@@ -151,12 +151,12 @@ iDefineTypeConstructionArgs(Translation, (iDocumentWidget *doc), doc) | |||
151 | static const char * translationServiceHost = "xlt.skyjake.fi"; | 151 | static const char * translationServiceHost = "xlt.skyjake.fi"; |
152 | static const uint16_t translationServicePort = 443; | 152 | static const uint16_t translationServicePort = 443; |
153 | 153 | ||
154 | static const char *doubleArrowSymbol = "\u20e2"; /* prevent getting mangled */ | 154 | //static const char *doubleArrowSymbol = "\U0001f192"; //"\u20e2"; /* prevent getting mangled */ |
155 | static const char *tripleBacktickSymbol = "\u20e3"; | 155 | //static const char *tripleBacktickSymbol = "\U0001f1a9"; //\u20e3"; |
156 | static const char *h1Symbol = "\u20e4"; | 156 | //static const char *h1Symbol = "\U0001f19d"; |
157 | static const char *h2Symbol = "\u20e5"; | 157 | //static const char *h2Symbol = "\U0001f19e"; |
158 | static const char *h3Symbol = "\u20e6"; | 158 | //static const char *h3Symbol = "\U0001f19f"; |
159 | static const char *bulletSymbol = "\n\u20e7"; | 159 | //static const char *bulletSymbol = "\n\U0001f196"; |
160 | 160 | ||
161 | static iString *quote_String_(const iString *d) { | 161 | static iString *quote_String_(const iString *d) { |
162 | iString *quot = new_String(); | 162 | iString *quot = new_String(); |
@@ -177,8 +177,19 @@ static iString *quote_String_(const iString *d) { | |||
177 | else if (ch == '\t') { | 177 | else if (ch == '\t') { |
178 | appendCStr_String(quot, "\\t"); | 178 | appendCStr_String(quot, "\\t"); |
179 | } | 179 | } |
180 | else if (ch >= 0x100) { | 180 | else if (ch >= 0x80) { |
181 | appendFormat_String(quot, "\\u%04x", ch); | 181 | if ((ch >= 0xD800 && ch < 0xE000) || ch >= 0x10000) { |
182 | /* TODO: Add a helper function? */ | ||
183 | /* UTF-16 surrogate pair */ | ||
184 | iString *chs = newUnicodeN_String(&ch, 1); | ||
185 | iBlock *u16 = toUtf16_String(chs); | ||
186 | delete_String(chs); | ||
187 | const uint16_t *ch16 = constData_Block(u16); | ||
188 | appendFormat_String(quot, "\\u%04x\\u%04x", ch16[0], ch16[1]); | ||
189 | } | ||
190 | else { | ||
191 | appendFormat_String(quot, "\\u%04x", ch); | ||
192 | } | ||
182 | } | 193 | } |
183 | else { | 194 | else { |
184 | appendChar_String(quot, ch); | 195 | appendChar_String(quot, ch); |
@@ -212,13 +223,27 @@ static iString *unquote_String_(const iString *d) { | |||
212 | else if (esc == 'u') { | 223 | else if (esc == 'u') { |
213 | char digits[5]; | 224 | char digits[5]; |
214 | iZap(digits); | 225 | iZap(digits); |
215 | iForIndices(j, digits) { | 226 | for (size_t j = 0; j < 4; j++) { |
216 | next_StringConstIterator(&i); | 227 | next_StringConstIterator(&i); |
217 | digits[j] = *i.pos; | 228 | digits[j] = *i.pos; |
218 | } | 229 | } |
219 | iChar codepoint = strtoul(digits, NULL, 16); | 230 | uint16_t ch16[2] = { strtoul(digits, NULL, 16), 0 }; |
220 | if (codepoint) { | 231 | if (ch16[0] < 0xD800 || ch16[0] >= 0xE000) { |
221 | appendChar_String(unquot, codepoint); | 232 | appendChar_String(unquot, ch16[0]); |
233 | } | ||
234 | else { | ||
235 | /* UTF-16 surrogate pair */ | ||
236 | next_StringConstIterator(&i); | ||
237 | next_StringConstIterator(&i); | ||
238 | iZap(digits); | ||
239 | for (size_t j = 0; j < 4; j++) { | ||
240 | next_StringConstIterator(&i); | ||
241 | digits[j] = *i.pos; | ||
242 | } | ||
243 | ch16[1] = strtoul(digits, NULL, 16); | ||
244 | iString *u16 = newUtf16N_String(ch16, 2); | ||
245 | append_String(unquot, u16); | ||
246 | delete_String(u16); | ||
222 | } | 247 | } |
223 | } | 248 | } |
224 | else { | 249 | else { |
@@ -243,6 +268,7 @@ void init_Translation(iTranslation *d, iDocumentWidget *doc) { | |||
243 | d->doc = doc; /* owner */ | 268 | d->doc = doc; /* owner */ |
244 | d->request = new_TlsRequest(); | 269 | d->request = new_TlsRequest(); |
245 | d->timer = 0; | 270 | d->timer = 0; |
271 | init_Array(&d->lineTypes, sizeof(int)); | ||
246 | setUserData_Object(d->request, d->doc); | 272 | setUserData_Object(d->request, d->doc); |
247 | setHost_TlsRequest(d->request, | 273 | setHost_TlsRequest(d->request, |
248 | collectNewCStr_String(translationServiceHost), | 274 | collectNewCStr_String(translationServiceHost), |
@@ -257,6 +283,7 @@ void deinit_Translation(iTranslation *d) { | |||
257 | cancel_TlsRequest(d->request); | 283 | cancel_TlsRequest(d->request); |
258 | iRelease(d->request); | 284 | iRelease(d->request); |
259 | destroy_Widget(d->dlg); | 285 | destroy_Widget(d->dlg); |
286 | deinit_Array(&d->lineTypes); | ||
260 | } | 287 | } |
261 | 288 | ||
262 | static uint32_t animate_Translation_(uint32_t interval, iAny *ptr) { | 289 | static uint32_t animate_Translation_(uint32_t interval, iAny *ptr) { |
@@ -270,13 +297,32 @@ void submit_Translation(iTranslation *d) { | |||
270 | const char *idTo = languageId_String(text_LabelWidget(findChild_Widget(d->dlg, "xlt.to"))); | 297 | const char *idTo = languageId_String(text_LabelWidget(findChild_Widget(d->dlg, "xlt.to"))); |
271 | iAssert(status_TlsRequest(d->request) != submitted_TlsRequestStatus); | 298 | iAssert(status_TlsRequest(d->request) != submitted_TlsRequestStatus); |
272 | iBlock *json = collect_Block(new_Block(0)); | 299 | iBlock *json = collect_Block(new_Block(0)); |
273 | iString *docSrc = collect_String(copy_String(source_GmDocument(document_DocumentWidget(d->doc)))); | 300 | iString *docSrc = collectNew_String(); |
274 | replace_String(docSrc, "=>", doubleArrowSymbol); | 301 | /* TODO: Strip all markup and remember it. These are reapplied when reading response. */ { |
275 | replace_String(docSrc, "```", tripleBacktickSymbol); | 302 | iRangecc line = iNullRange; |
276 | replace_String(docSrc, "###", h3Symbol); | 303 | while (nextSplit_Rangecc( |
277 | replace_String(docSrc, "##", h2Symbol); | 304 | range_String(source_GmDocument(document_DocumentWidget(d->doc))), "\n", &line)) { |
278 | replace_String(docSrc, "#", h1Symbol); | 305 | iRangecc cleanLine = trimmed_Rangecc(line); |
279 | replace_String(docSrc, "\n*", bulletSymbol); | 306 | const int lineType = lineType_Rangecc(cleanLine); |
307 | pushBack_Array(&d->lineTypes, &lineType); | ||
308 | if (lineType == link_GmLineType) { | ||
309 | cleanLine.start += 2; /* skip over the => */ | ||
310 | } | ||
311 | else { | ||
312 | trimLine_Rangecc(&cleanLine, lineType, iTrue); /* removes the prefix */ | ||
313 | } | ||
314 | if (!isEmpty_String(docSrc)) { | ||
315 | appendCStr_String(docSrc, "\n"); | ||
316 | } | ||
317 | appendRange_String(docSrc, cleanLine); | ||
318 | } | ||
319 | } | ||
320 | // replace_String(docSrc, "=>", doubleArrowSymbol); | ||
321 | // replace_String(docSrc, "```", tripleBacktickSymbol); | ||
322 | // replace_String(docSrc, "###", h3Symbol); | ||
323 | // replace_String(docSrc, "##", h2Symbol); | ||
324 | // replace_String(docSrc, "#", h1Symbol); | ||
325 | // replace_String(docSrc, "\n*", bulletSymbol); | ||
280 | printf_Block(json, | 326 | printf_Block(json, |
281 | "{\"q\":\"%s\",\"source\":\"%s\",\"target\":\"%s\"}", | 327 | "{\"q\":\"%s\",\"source\":\"%s\",\"target\":\"%s\"}", |
282 | cstrCollect_String(quote_String_(docSrc)), | 328 | cstrCollect_String(quote_String_(docSrc)), |
@@ -286,7 +332,7 @@ void submit_Translation(iTranslation *d) { | |||
286 | printf_Block(msg, "POST /translate HTTP/1.1\r\n" | 332 | printf_Block(msg, "POST /translate HTTP/1.1\r\n" |
287 | "Host: xlt.skyjake.fi\r\n" | 333 | "Host: xlt.skyjake.fi\r\n" |
288 | "Connection: close\r\n" | 334 | "Connection: close\r\n" |
289 | "Content-Type: application/json\r\n" | 335 | "Content-Type: application/json; charset=utf-8\r\n" |
290 | "Content-Length: %zu\r\n\r\n", size_Block(json)); | 336 | "Content-Length: %zu\r\n\r\n", size_Block(json)); |
291 | append_Block(msg, json); | 337 | append_Block(msg, json); |
292 | setContent_TlsRequest(d->request, msg); | 338 | setContent_TlsRequest(d->request, msg); |
@@ -310,20 +356,58 @@ static iBool processResult_Translation_(iTranslation *d) { | |||
310 | return iFalse; | 356 | return iFalse; |
311 | } | 357 | } |
312 | iBlock *resultData = collect_Block(readAll_TlsRequest(d->request)); | 358 | iBlock *resultData = collect_Block(readAll_TlsRequest(d->request)); |
313 | // printf("result(%zu):\n%s\n", size_Block(resultData), cstr_Block(resultData)); | 359 | printf("result(%zu):\n%s\n", size_Block(resultData), cstr_Block(resultData)); |
314 | // fflush(stdout); | 360 | fflush(stdout); |
315 | iRegExp *pattern = iClob(new_RegExp(".*translatedText\":\"(.*)\"\\}", caseSensitive_RegExpOption)); | 361 | iRegExp *pattern = iClob(new_RegExp(".*translatedText\":\"(.*)\"\\}", caseSensitive_RegExpOption)); |
316 | iRegExpMatch m; | 362 | iRegExpMatch m; |
317 | init_RegExpMatch(&m); | 363 | init_RegExpMatch(&m); |
318 | if (matchRange_RegExp(pattern, range_Block(resultData), &m)) { | 364 | if (matchRange_RegExp(pattern, range_Block(resultData), &m)) { |
319 | iString *translation = unquote_String_(collect_String(captured_RegExpMatch(&m, 1))); | 365 | iString *translation = unquote_String_(collect_String(captured_RegExpMatch(&m, 1))); |
320 | replace_String(translation, tripleBacktickSymbol, "```"); | 366 | iString *marked = collectNew_String(); |
321 | replace_String(translation, doubleArrowSymbol, "=>"); | 367 | iRangecc line; |
322 | replace_String(translation, h3Symbol, "### "); | 368 | size_t lineIndex = 0; |
323 | replace_String(translation, h2Symbol, "## "); | 369 | while (nextSplit_Rangecc(range_String(translation), "\n", &line)) { |
324 | replace_String(translation, h1Symbol, "# "); | 370 | iRangecc cleanLine = trimmed_Rangecc(line); |
325 | replace_String(translation, bulletSymbol, "\n* "); | 371 | if (!isEmpty_String(marked)) { |
326 | setSource_DocumentWidget(d->doc, translation); | 372 | appendCStr_String(marked, "\n"); |
373 | } | ||
374 | if (lineIndex < size_Array(&d->lineTypes)) { | ||
375 | switch (value_Array(&d->lineTypes, lineIndex, int)) { | ||
376 | case bullet_GmLineType: | ||
377 | appendCStr_String(marked, "* "); | ||
378 | break; | ||
379 | case link_GmLineType: | ||
380 | appendCStr_String(marked, "=> "); | ||
381 | break; | ||
382 | case quote_GmLineType: | ||
383 | appendCStr_String(marked, "> "); | ||
384 | break; | ||
385 | case preformatted_GmLineType: | ||
386 | appendCStr_String(marked, "```"); | ||
387 | break; | ||
388 | case heading1_GmLineType: | ||
389 | appendCStr_String(marked, "# "); | ||
390 | break; | ||
391 | case heading2_GmLineType: | ||
392 | appendCStr_String(marked, "## "); | ||
393 | break; | ||
394 | case heading3_GmLineType: | ||
395 | appendCStr_String(marked, "### "); | ||
396 | break; | ||
397 | default: | ||
398 | break; | ||
399 | } | ||
400 | } | ||
401 | appendRange_String(marked, cleanLine); | ||
402 | lineIndex++; | ||
403 | } | ||
404 | // replace_String(translation, tripleBacktickSymbol, "```"); | ||
405 | // replace_String(translation, doubleArrowSymbol, "=>"); | ||
406 | // replace_String(translation, h3Symbol, "### "); | ||
407 | // replace_String(translation, h2Symbol, "## "); | ||
408 | // replace_String(translation, h1Symbol, "# "); | ||
409 | // replace_String(translation, bulletSymbol, "\n* "); | ||
410 | setSource_DocumentWidget(d->doc, marked); | ||
327 | postCommand_App("sidebar.update"); | 411 | postCommand_App("sidebar.update"); |
328 | delete_String(translation); | 412 | delete_String(translation); |
329 | } | 413 | } |