summaryrefslogtreecommitdiff
path: root/src/ui/translation.c
diff options
context:
space:
mode:
authorJaakko Keränen <jaakko.keranen@iki.fi>2021-03-16 12:35:49 +0200
committerJaakko Keränen <jaakko.keranen@iki.fi>2021-03-16 12:35:49 +0200
commitce1ac1ba801d6f09ec028838d5fed863ffce6a9a (patch)
tree7e7e25f9480130f8f479efe5a0396c41eadb6a0f /src/ui/translation.c
parentee495092d59f5d5cda9491ebdaa7ef7f107341a3 (diff)
Translation: More reliable markup preservation
Diffstat (limited to 'src/ui/translation.c')
-rw-r--r--src/ui/translation.c144
1 files changed, 114 insertions, 30 deletions
diff --git a/src/ui/translation.c b/src/ui/translation.c
index 63d64d1b..e5059f38 100644
--- a/src/ui/translation.c
+++ b/src/ui/translation.c
@@ -111,7 +111,7 @@ static void draw_TranslationProgressWidget_(const iTranslationProgressWidget *d)
111 const iInt2 mid = mid_Rect(bounds); 111 const iInt2 mid = mid_Rect(bounds);
112 SDL_SetRenderDrawBlendMode(renderer_Window(get_Window()), SDL_BLENDMODE_BLEND); 112 SDL_SetRenderDrawBlendMode(renderer_Window(get_Window()), SDL_BLENDMODE_BLEND);
113 iConstForEach(Array, i, &d->sprites) { 113 iConstForEach(Array, i, &d->sprites) {
114 const int index = index_ArrayConstIterator(&i); 114 const int index = (int) index_ArrayConstIterator(&i);
115 const float angle = (float) index; 115 const float angle = (float) index;
116 const iSprite *spr = i.value; 116 const iSprite *spr = i.value;
117 const float opacity = iClamp(t - index * 0.5f, 0.0, 1.0f); 117 const float opacity = iClamp(t - index * 0.5f, 0.0, 1.0f);
@@ -151,12 +151,12 @@ iDefineTypeConstructionArgs(Translation, (iDocumentWidget *doc), doc)
151static const char * translationServiceHost = "xlt.skyjake.fi"; 151static const char * translationServiceHost = "xlt.skyjake.fi";
152static const uint16_t translationServicePort = 443; 152static const uint16_t translationServicePort = 443;
153 153
154static const char *doubleArrowSymbol = "\u20e2"; /* prevent getting mangled */ 154//static const char *doubleArrowSymbol = "\U0001f192"; //"\u20e2"; /* prevent getting mangled */
155static const char *tripleBacktickSymbol = "\u20e3"; 155//static const char *tripleBacktickSymbol = "\U0001f1a9"; //\u20e3";
156static const char *h1Symbol = "\u20e4"; 156//static const char *h1Symbol = "\U0001f19d";
157static const char *h2Symbol = "\u20e5"; 157//static const char *h2Symbol = "\U0001f19e";
158static const char *h3Symbol = "\u20e6"; 158//static const char *h3Symbol = "\U0001f19f";
159static const char *bulletSymbol = "\n\u20e7"; 159//static const char *bulletSymbol = "\n\U0001f196";
160 160
161static iString *quote_String_(const iString *d) { 161static iString *quote_String_(const iString *d) {
162 iString *quot = new_String(); 162 iString *quot = new_String();
@@ -177,8 +177,19 @@ static iString *quote_String_(const iString *d) {
177 else if (ch == '\t') { 177 else if (ch == '\t') {
178 appendCStr_String(quot, "\\t"); 178 appendCStr_String(quot, "\\t");
179 } 179 }
180 else if (ch >= 0x100) { 180 else if (ch >= 0x80) {
181 appendFormat_String(quot, "\\u%04x", ch); 181 if ((ch >= 0xD800 && ch < 0xE000) || ch >= 0x10000) {
182 /* TODO: Add a helper function? */
183 /* UTF-16 surrogate pair */
184 iString *chs = newUnicodeN_String(&ch, 1);
185 iBlock *u16 = toUtf16_String(chs);
186 delete_String(chs);
187 const uint16_t *ch16 = constData_Block(u16);
188 appendFormat_String(quot, "\\u%04x\\u%04x", ch16[0], ch16[1]);
189 }
190 else {
191 appendFormat_String(quot, "\\u%04x", ch);
192 }
182 } 193 }
183 else { 194 else {
184 appendChar_String(quot, ch); 195 appendChar_String(quot, ch);
@@ -212,13 +223,27 @@ static iString *unquote_String_(const iString *d) {
212 else if (esc == 'u') { 223 else if (esc == 'u') {
213 char digits[5]; 224 char digits[5];
214 iZap(digits); 225 iZap(digits);
215 iForIndices(j, digits) { 226 for (size_t j = 0; j < 4; j++) {
216 next_StringConstIterator(&i); 227 next_StringConstIterator(&i);
217 digits[j] = *i.pos; 228 digits[j] = *i.pos;
218 } 229 }
219 iChar codepoint = strtoul(digits, NULL, 16); 230 uint16_t ch16[2] = { strtoul(digits, NULL, 16), 0 };
220 if (codepoint) { 231 if (ch16[0] < 0xD800 || ch16[0] >= 0xE000) {
221 appendChar_String(unquot, codepoint); 232 appendChar_String(unquot, ch16[0]);
233 }
234 else {
235 /* UTF-16 surrogate pair */
236 next_StringConstIterator(&i);
237 next_StringConstIterator(&i);
238 iZap(digits);
239 for (size_t j = 0; j < 4; j++) {
240 next_StringConstIterator(&i);
241 digits[j] = *i.pos;
242 }
243 ch16[1] = strtoul(digits, NULL, 16);
244 iString *u16 = newUtf16N_String(ch16, 2);
245 append_String(unquot, u16);
246 delete_String(u16);
222 } 247 }
223 } 248 }
224 else { 249 else {
@@ -243,6 +268,7 @@ void init_Translation(iTranslation *d, iDocumentWidget *doc) {
243 d->doc = doc; /* owner */ 268 d->doc = doc; /* owner */
244 d->request = new_TlsRequest(); 269 d->request = new_TlsRequest();
245 d->timer = 0; 270 d->timer = 0;
271 init_Array(&d->lineTypes, sizeof(int));
246 setUserData_Object(d->request, d->doc); 272 setUserData_Object(d->request, d->doc);
247 setHost_TlsRequest(d->request, 273 setHost_TlsRequest(d->request,
248 collectNewCStr_String(translationServiceHost), 274 collectNewCStr_String(translationServiceHost),
@@ -257,6 +283,7 @@ void deinit_Translation(iTranslation *d) {
257 cancel_TlsRequest(d->request); 283 cancel_TlsRequest(d->request);
258 iRelease(d->request); 284 iRelease(d->request);
259 destroy_Widget(d->dlg); 285 destroy_Widget(d->dlg);
286 deinit_Array(&d->lineTypes);
260} 287}
261 288
262static uint32_t animate_Translation_(uint32_t interval, iAny *ptr) { 289static uint32_t animate_Translation_(uint32_t interval, iAny *ptr) {
@@ -270,13 +297,32 @@ void submit_Translation(iTranslation *d) {
270 const char *idTo = languageId_String(text_LabelWidget(findChild_Widget(d->dlg, "xlt.to"))); 297 const char *idTo = languageId_String(text_LabelWidget(findChild_Widget(d->dlg, "xlt.to")));
271 iAssert(status_TlsRequest(d->request) != submitted_TlsRequestStatus); 298 iAssert(status_TlsRequest(d->request) != submitted_TlsRequestStatus);
272 iBlock *json = collect_Block(new_Block(0)); 299 iBlock *json = collect_Block(new_Block(0));
273 iString *docSrc = collect_String(copy_String(source_GmDocument(document_DocumentWidget(d->doc)))); 300 iString *docSrc = collectNew_String();
274 replace_String(docSrc, "=>", doubleArrowSymbol); 301 /* TODO: Strip all markup and remember it. These are reapplied when reading response. */ {
275 replace_String(docSrc, "```", tripleBacktickSymbol); 302 iRangecc line = iNullRange;
276 replace_String(docSrc, "###", h3Symbol); 303 while (nextSplit_Rangecc(
277 replace_String(docSrc, "##", h2Symbol); 304 range_String(source_GmDocument(document_DocumentWidget(d->doc))), "\n", &line)) {
278 replace_String(docSrc, "#", h1Symbol); 305 iRangecc cleanLine = trimmed_Rangecc(line);
279 replace_String(docSrc, "\n*", bulletSymbol); 306 const int lineType = lineType_Rangecc(cleanLine);
307 pushBack_Array(&d->lineTypes, &lineType);
308 if (lineType == link_GmLineType) {
309 cleanLine.start += 2; /* skip over the => */
310 }
311 else {
312 trimLine_Rangecc(&cleanLine, lineType, iTrue); /* removes the prefix */
313 }
314 if (!isEmpty_String(docSrc)) {
315 appendCStr_String(docSrc, "\n");
316 }
317 appendRange_String(docSrc, cleanLine);
318 }
319 }
320// replace_String(docSrc, "=>", doubleArrowSymbol);
321// replace_String(docSrc, "```", tripleBacktickSymbol);
322// replace_String(docSrc, "###", h3Symbol);
323// replace_String(docSrc, "##", h2Symbol);
324// replace_String(docSrc, "#", h1Symbol);
325// replace_String(docSrc, "\n*", bulletSymbol);
280 printf_Block(json, 326 printf_Block(json,
281 "{\"q\":\"%s\",\"source\":\"%s\",\"target\":\"%s\"}", 327 "{\"q\":\"%s\",\"source\":\"%s\",\"target\":\"%s\"}",
282 cstrCollect_String(quote_String_(docSrc)), 328 cstrCollect_String(quote_String_(docSrc)),
@@ -286,7 +332,7 @@ void submit_Translation(iTranslation *d) {
286 printf_Block(msg, "POST /translate HTTP/1.1\r\n" 332 printf_Block(msg, "POST /translate HTTP/1.1\r\n"
287 "Host: xlt.skyjake.fi\r\n" 333 "Host: xlt.skyjake.fi\r\n"
288 "Connection: close\r\n" 334 "Connection: close\r\n"
289 "Content-Type: application/json\r\n" 335 "Content-Type: application/json; charset=utf-8\r\n"
290 "Content-Length: %zu\r\n\r\n", size_Block(json)); 336 "Content-Length: %zu\r\n\r\n", size_Block(json));
291 append_Block(msg, json); 337 append_Block(msg, json);
292 setContent_TlsRequest(d->request, msg); 338 setContent_TlsRequest(d->request, msg);
@@ -310,20 +356,58 @@ static iBool processResult_Translation_(iTranslation *d) {
310 return iFalse; 356 return iFalse;
311 } 357 }
312 iBlock *resultData = collect_Block(readAll_TlsRequest(d->request)); 358 iBlock *resultData = collect_Block(readAll_TlsRequest(d->request));
313// printf("result(%zu):\n%s\n", size_Block(resultData), cstr_Block(resultData)); 359 printf("result(%zu):\n%s\n", size_Block(resultData), cstr_Block(resultData));
314// fflush(stdout); 360 fflush(stdout);
315 iRegExp *pattern = iClob(new_RegExp(".*translatedText\":\"(.*)\"\\}", caseSensitive_RegExpOption)); 361 iRegExp *pattern = iClob(new_RegExp(".*translatedText\":\"(.*)\"\\}", caseSensitive_RegExpOption));
316 iRegExpMatch m; 362 iRegExpMatch m;
317 init_RegExpMatch(&m); 363 init_RegExpMatch(&m);
318 if (matchRange_RegExp(pattern, range_Block(resultData), &m)) { 364 if (matchRange_RegExp(pattern, range_Block(resultData), &m)) {
319 iString *translation = unquote_String_(collect_String(captured_RegExpMatch(&m, 1))); 365 iString *translation = unquote_String_(collect_String(captured_RegExpMatch(&m, 1)));
320 replace_String(translation, tripleBacktickSymbol, "```"); 366 iString *marked = collectNew_String();
321 replace_String(translation, doubleArrowSymbol, "=>"); 367 iRangecc line;
322 replace_String(translation, h3Symbol, "### "); 368 size_t lineIndex = 0;
323 replace_String(translation, h2Symbol, "## "); 369 while (nextSplit_Rangecc(range_String(translation), "\n", &line)) {
324 replace_String(translation, h1Symbol, "# "); 370 iRangecc cleanLine = trimmed_Rangecc(line);
325 replace_String(translation, bulletSymbol, "\n* "); 371 if (!isEmpty_String(marked)) {
326 setSource_DocumentWidget(d->doc, translation); 372 appendCStr_String(marked, "\n");
373 }
374 if (lineIndex < size_Array(&d->lineTypes)) {
375 switch (value_Array(&d->lineTypes, lineIndex, int)) {
376 case bullet_GmLineType:
377 appendCStr_String(marked, "* ");
378 break;
379 case link_GmLineType:
380 appendCStr_String(marked, "=> ");
381 break;
382 case quote_GmLineType:
383 appendCStr_String(marked, "> ");
384 break;
385 case preformatted_GmLineType:
386 appendCStr_String(marked, "```");
387 break;
388 case heading1_GmLineType:
389 appendCStr_String(marked, "# ");
390 break;
391 case heading2_GmLineType:
392 appendCStr_String(marked, "## ");
393 break;
394 case heading3_GmLineType:
395 appendCStr_String(marked, "### ");
396 break;
397 default:
398 break;
399 }
400 }
401 appendRange_String(marked, cleanLine);
402 lineIndex++;
403 }
404// replace_String(translation, tripleBacktickSymbol, "```");
405// replace_String(translation, doubleArrowSymbol, "=>");
406// replace_String(translation, h3Symbol, "### ");
407// replace_String(translation, h2Symbol, "## ");
408// replace_String(translation, h1Symbol, "# ");
409// replace_String(translation, bulletSymbol, "\n* ");
410 setSource_DocumentWidget(d->doc, marked);
327 postCommand_App("sidebar.update"); 411 postCommand_App("sidebar.update");
328 delete_String(translation); 412 delete_String(translation);
329 } 413 }