diff options
author | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-07-14 18:46:03 +0300 |
---|---|---|
committer | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-07-14 18:46:03 +0300 |
commit | 559927aa198bf062e32b5fe55431dfb7942d40f2 (patch) | |
tree | 1932831f777734dc4c292040bbbca13dd8f9af1a /src | |
parent | efb12e24a2cdf311c8aeee9627da89398c3aa132 (diff) |
Text: Detect Arabic script
It appears HarfBuzz's script autodetection isn't working here for Arabic.
Diffstat (limited to 'src')
-rw-r--r-- | src/ui/text.c | 91 | ||||
-rw-r--r-- | src/ui/text.h | 2 |
2 files changed, 50 insertions, 43 deletions
diff --git a/src/ui/text.c b/src/ui/text.c index 4b82191f..01a9d606 100644 --- a/src/ui/text.c +++ b/src/ui/text.c | |||
@@ -163,8 +163,9 @@ static void init_Font(iFont *d, const iBlock *data, int height, float scale, | |||
163 | data == &fontNunitoExtraLight_Embedded) { | 163 | data == &fontNunitoExtraLight_Embedded) { |
164 | d->family = nunito_TextFont; | 164 | d->family = nunito_TextFont; |
165 | } | 165 | } |
166 | else if (data == &fontNotoSansArabicUIRegular_Embedded) { | 166 | else if (//data == &fontScheherazadeNewRegular_Embedded) { |
167 | d->family = notoSansArabic_TextFont; | 167 | data == &fontNotoSansArabicUIRegular_Embedded) { |
168 | d->family = arabic_TextFont; | ||
168 | } | 169 | } |
169 | else if (data == &fontNotoSansSymbolsRegular_Embedded || | 170 | else if (data == &fontNotoSansSymbolsRegular_Embedded || |
170 | data == &fontNotoSansSymbols2Regular_Embedded || | 171 | data == &fontNotoSansSymbols2Regular_Embedded || |
@@ -421,6 +422,7 @@ static void initFonts_Text_(iText *d) { | |||
421 | DEFINE_FONT_SET(&fontNotoSansSCRegular_Embedded, 1.0f), | 422 | DEFINE_FONT_SET(&fontNotoSansSCRegular_Embedded, 1.0f), |
422 | DEFINE_FONT_SET(&fontNanumGothicRegular_Embedded, 1.0f), /* TODO: should use Noto Sans here, too */ | 423 | DEFINE_FONT_SET(&fontNanumGothicRegular_Embedded, 1.0f), /* TODO: should use Noto Sans here, too */ |
423 | DEFINE_FONT_SET(&fontNotoSansArabicUIRegular_Embedded, 1.0f), | 424 | DEFINE_FONT_SET(&fontNotoSansArabicUIRegular_Embedded, 1.0f), |
425 | // DEFINE_FONT_SET(&fontScheherazadeNewRegular_Embedded, 1.0f), | ||
424 | }; | 426 | }; |
425 | iForIndices(i, fontData) { | 427 | iForIndices(i, fontData) { |
426 | iFont *font = &d->fonts[i]; | 428 | iFont *font = &d->fonts[i]; |
@@ -787,6 +789,7 @@ struct Impl_AttributedRun { | |||
787 | struct { | 789 | struct { |
788 | uint8_t isLineBreak : 1; | 790 | uint8_t isLineBreak : 1; |
789 | uint8_t isRTL : 1; | 791 | uint8_t isRTL : 1; |
792 | uint8_t isArabic : 1; /* Arabic script detected */ | ||
790 | } flags; | 793 | } flags; |
791 | }; | 794 | }; |
792 | 795 | ||
@@ -803,6 +806,7 @@ struct Impl_AttributedText { | |||
803 | iArray logical; /* UTF-32 text in logical order (mixed directions; matches source) */ | 806 | iArray logical; /* UTF-32 text in logical order (mixed directions; matches source) */ |
804 | iArray visual; /* UTF-32 text in visual order (LTR) */ | 807 | iArray visual; /* UTF-32 text in visual order (LTR) */ |
805 | iArray logicalToVisual; /* map visual index to logical index */ | 808 | iArray logicalToVisual; /* map visual index to logical index */ |
809 | iArray visualToLogical; | ||
806 | iArray logicalToSourceOffset; /* map logical character to an UTF-8 offset in the source text */ | 810 | iArray logicalToSourceOffset; /* map logical character to an UTF-8 offset in the source text */ |
807 | char * bidiLevels; | 811 | char * bidiLevels; |
808 | iBool isBaseRTL; | 812 | iBool isBaseRTL; |
@@ -846,6 +850,7 @@ static void finishRun_AttributedText_(iAttributedText *d, iAttributedRun *run, i | |||
846 | if (!isEmpty_Range(&finishedRun.logical)) { | 850 | if (!isEmpty_Range(&finishedRun.logical)) { |
847 | pushBack_Array(&d->runs, &finishedRun); | 851 | pushBack_Array(&d->runs, &finishedRun); |
848 | run->flags.isLineBreak = iFalse; | 852 | run->flags.isLineBreak = iFalse; |
853 | run->flags.isArabic = iFalse; | ||
849 | } | 854 | } |
850 | run->logical.start = endAt; | 855 | run->logical.start = endAt; |
851 | } | 856 | } |
@@ -885,14 +890,15 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir) { | |||
885 | /* Use FriBidi to reorder the codepoints. */ | 890 | /* Use FriBidi to reorder the codepoints. */ |
886 | resize_Array(&d->visual, length); | 891 | resize_Array(&d->visual, length); |
887 | resize_Array(&d->logicalToVisual, length); | 892 | resize_Array(&d->logicalToVisual, length); |
888 | d->bidiLevels = malloc(length); | 893 | resize_Array(&d->visualToLogical, length); |
894 | d->bidiLevels = length ? malloc(length) : NULL; | ||
889 | FriBidiParType baseDir = (FriBidiParType) FRIBIDI_TYPE_ON; | 895 | FriBidiParType baseDir = (FriBidiParType) FRIBIDI_TYPE_ON; |
890 | fribidi_log2vis(constData_Array(&d->logical), | 896 | fribidi_log2vis(constData_Array(&d->logical), |
891 | length, | 897 | length, |
892 | &baseDir, | 898 | &baseDir, |
893 | data_Array(&d->visual), | 899 | data_Array(&d->visual), |
894 | data_Array(&d->logicalToVisual), | 900 | data_Array(&d->logicalToVisual), |
895 | NULL, | 901 | data_Array(&d->visualToLogical), |
896 | (FriBidiLevel *) d->bidiLevels); | 902 | (FriBidiLevel *) d->bidiLevels); |
897 | d->isBaseRTL = (overrideBaseDir == 0 ? FRIBIDI_IS_RTL(baseDir) : (overrideBaseDir < 0)); | 903 | d->isBaseRTL = (overrideBaseDir == 0 ? FRIBIDI_IS_RTL(baseDir) : (overrideBaseDir < 0)); |
898 | #else | 904 | #else |
@@ -902,11 +908,14 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir) { | |||
902 | for (size_t i = 0; i < length; i++) { | 908 | for (size_t i = 0; i < length; i++) { |
903 | set_Array(&d->logicalToVisual, i, &(int){ i }); | 909 | set_Array(&d->logicalToVisual, i, &(int){ i }); |
904 | } | 910 | } |
911 | setCopy_Array(&d->visualToLogical, &d->logicalToVisual); | ||
905 | d->isBaseRTL = iFalse; | 912 | d->isBaseRTL = iFalse; |
906 | #endif | 913 | #endif |
907 | } | 914 | } |
908 | /* The mapping needs to include the terminating NULL position. */ { | 915 | /* The mapping needs to include the terminating NULL position. */ { |
909 | pushBack_Array(&d->logicalToSourceOffset, &(int){ d->source.end - d->source.start }); | 916 | pushBack_Array(&d->logicalToSourceOffset, &(int){ d->source.end - d->source.start }); |
917 | pushBack_Array(&d->logicalToVisual, &(int){ length }); | ||
918 | pushBack_Array(&d->visualToLogical, &(int){ length }); | ||
910 | } | 919 | } |
911 | size_t avail = d->maxLen; | 920 | size_t avail = d->maxLen; |
912 | iAttributedRun run = { .logical = { 0, length }, | 921 | iAttributedRun run = { .logical = { 0, length }, |
@@ -918,18 +927,19 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir) { | |||
918 | iBool isRTL = d->isBaseRTL; | 927 | iBool isRTL = d->isBaseRTL; |
919 | int numNonSpace = 0; | 928 | int numNonSpace = 0; |
920 | for (int pos = 0; pos < length; pos++) { | 929 | for (int pos = 0; pos < length; pos++) { |
921 | const iChar ch = logicalText[pos]; | 930 | const iChar ch = logicalText[pos]; |
922 | const int visPos = logToVis[pos]; | ||
923 | #if defined (LAGRANGE_ENABLE_FRIBIDI) | 931 | #if defined (LAGRANGE_ENABLE_FRIBIDI) |
924 | const char lev = d->bidiLevels[pos]; | 932 | if (d->bidiLevels) { |
925 | const iBool isNeutral = FRIBIDI_IS_NEUTRAL(lev); | 933 | const char lev = d->bidiLevels[pos]; |
926 | if (d->bidiLevels && !isNeutral) { | 934 | const iBool isNeutral = FRIBIDI_IS_NEUTRAL(lev); |
927 | iBool rtl = FRIBIDI_IS_RTL(lev) != 0; | 935 | if (!isNeutral) { |
928 | if (rtl != isRTL) { | 936 | iBool rtl = FRIBIDI_IS_RTL(lev) != 0; |
929 | /* Direction changes; must end the current run. */ | 937 | if (rtl != isRTL) { |
930 | // printf("dir change at %zu: %lc U+%04X\n", pos, ch, ch); | 938 | /* Direction changes; must end the current run. */ |
931 | finishRun_AttributedText_(d, &run, pos); | 939 | // printf("dir change at %zu: %lc U+%04X\n", pos, ch, ch); |
932 | isRTL = rtl; | 940 | finishRun_AttributedText_(d, &run, pos); |
941 | isRTL = rtl; | ||
942 | } | ||
933 | } | 943 | } |
934 | } | 944 | } |
935 | #else | 945 | #else |
@@ -993,7 +1003,7 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir) { | |||
993 | continue; | 1003 | continue; |
994 | } | 1004 | } |
995 | iFont *currentFont = d->font; | 1005 | iFont *currentFont = d->font; |
996 | if (run.font->family == notoSansArabic_TextFont && isPunct_Char(ch)) { | 1006 | if (run.font->family == arabic_TextFont && isPunct_Char(ch)) { |
997 | currentFont = run.font; /* remain as Arabic for whitespace */ | 1007 | currentFont = run.font; /* remain as Arabic for whitespace */ |
998 | } | 1008 | } |
999 | const iGlyph *glyph = glyph_Font_(currentFont, ch); | 1009 | const iGlyph *glyph = glyph_Font_(currentFont, ch); |
@@ -1006,6 +1016,11 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir) { | |||
1006 | (int)logicalText[pos]); | 1016 | (int)logicalText[pos]); |
1007 | #endif | 1017 | #endif |
1008 | } | 1018 | } |
1019 | #if defined (LAGRANGE_ENABLE_FRIBIDI) | ||
1020 | if (fribidi_get_bidi_type(ch) == FRIBIDI_TYPE_AL) { | ||
1021 | run.flags.isArabic = iTrue; /* Arabic letter */ | ||
1022 | } | ||
1023 | #endif | ||
1009 | } | 1024 | } |
1010 | if (!isEmpty_Range(&run.logical)) { | 1025 | if (!isEmpty_Range(&run.logical)) { |
1011 | pushBack_Array(&d->runs, &run); | 1026 | pushBack_Array(&d->runs, &run); |
@@ -1014,9 +1029,10 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir) { | |||
1014 | printf("[AttributedText] %zu runs:\n", size_Array(&d->runs)); | 1029 | printf("[AttributedText] %zu runs:\n", size_Array(&d->runs)); |
1015 | iConstForEach(Array, i, &d->runs) { | 1030 | iConstForEach(Array, i, &d->runs) { |
1016 | const iAttributedRun *run = i.value; | 1031 | const iAttributedRun *run = i.value; |
1017 | printf(" %zu %s %d...%d {%s}\n", index_ArrayConstIterator(&i), | 1032 | printf(" %zu %s log:%d...%d vis:%d...%d {%s}\n", index_ArrayConstIterator(&i), |
1018 | run->flags.isRTL ? "<-" : "->", | 1033 | run->flags.isRTL ? "<-" : "->", |
1019 | run->logical.start, run->logical.end, | 1034 | run->logical.start, run->logical.end - 1, |
1035 | logToVis[run->logical.start], logToVis[run->logical.end - 1], | ||
1020 | cstr_Rangecc(sourceRange_AttributedText_(d, run->logical))); | 1036 | cstr_Rangecc(sourceRange_AttributedText_(d, run->logical))); |
1021 | } | 1037 | } |
1022 | #endif | 1038 | #endif |
@@ -1032,6 +1048,7 @@ void init_AttributedText(iAttributedText *d, iRangecc text, size_t maxLen, iFont | |||
1032 | init_Array(&d->logical, sizeof(iChar)); | 1048 | init_Array(&d->logical, sizeof(iChar)); |
1033 | init_Array(&d->visual, sizeof(iChar)); | 1049 | init_Array(&d->visual, sizeof(iChar)); |
1034 | init_Array(&d->logicalToVisual, sizeof(int)); | 1050 | init_Array(&d->logicalToVisual, sizeof(int)); |
1051 | init_Array(&d->visualToLogical, sizeof(int)); | ||
1035 | init_Array(&d->logicalToSourceOffset, sizeof(int)); | 1052 | init_Array(&d->logicalToSourceOffset, sizeof(int)); |
1036 | d->bidiLevels = NULL; | 1053 | d->bidiLevels = NULL; |
1037 | d->isBaseRTL = iFalse; | 1054 | d->isBaseRTL = iFalse; |
@@ -1042,6 +1059,7 @@ void deinit_AttributedText(iAttributedText *d) { | |||
1042 | free(d->bidiLevels); | 1059 | free(d->bidiLevels); |
1043 | deinit_Array(&d->logicalToSourceOffset); | 1060 | deinit_Array(&d->logicalToSourceOffset); |
1044 | deinit_Array(&d->logicalToVisual); | 1061 | deinit_Array(&d->logicalToVisual); |
1062 | deinit_Array(&d->visualToLogical); | ||
1045 | deinit_Array(&d->visual); | 1063 | deinit_Array(&d->visual); |
1046 | deinit_Array(&d->logical); | 1064 | deinit_Array(&d->logical); |
1047 | deinit_Array(&d->runs); | 1065 | deinit_Array(&d->runs); |
@@ -1355,18 +1373,6 @@ static void evenMonospaceAdvances_GlyphBuffer_(iGlyphBuffer *d, iFont *baseFont) | |||
1355 | } | 1373 | } |
1356 | } | 1374 | } |
1357 | 1375 | ||
1358 | iLocalDef iChar flipBracket_(iChar c) { | ||
1359 | if (c == '(') return ')'; | ||
1360 | if (c == ')') return '('; | ||
1361 | if (c == '{') return '}'; | ||
1362 | if (c == '}') return '{'; | ||
1363 | if (c == '[') return ']'; | ||
1364 | if (c == ']') return '['; | ||
1365 | if (c == '<') return '>'; | ||
1366 | if (c == '>') return '<'; | ||
1367 | return c; | ||
1368 | } | ||
1369 | |||
1370 | static iRect run_Font_(iFont *d, const iRunArgs *args) { | 1376 | static iRect run_Font_(iFont *d, const iRunArgs *args) { |
1371 | const int mode = args->mode; | 1377 | const int mode = args->mode; |
1372 | const iInt2 orig = args->pos; | 1378 | const iInt2 orig = args->pos; |
@@ -1394,6 +1400,7 @@ static iRect run_Font_(iFont *d, const iRunArgs *args) { | |||
1394 | const iChar *logicalText = constData_Array(&attrText.logical); | 1400 | const iChar *logicalText = constData_Array(&attrText.logical); |
1395 | const iChar *visualText = constData_Array(&attrText.visual); | 1401 | const iChar *visualText = constData_Array(&attrText.visual); |
1396 | const int * logToVis = constData_Array(&attrText.logicalToVisual); | 1402 | const int * logToVis = constData_Array(&attrText.logicalToVisual); |
1403 | const int * visToLog = constData_Array(&attrText.visualToLogical); | ||
1397 | const size_t runCount = size_Array(&attrText.runs); | 1404 | const size_t runCount = size_Array(&attrText.runs); |
1398 | iArray buffers; | 1405 | iArray buffers; |
1399 | init_Array(&buffers, sizeof(iGlyphBuffer)); | 1406 | init_Array(&buffers, sizeof(iGlyphBuffer)); |
@@ -1403,20 +1410,20 @@ static iRect run_Font_(iFont *d, const iRunArgs *args) { | |||
1403 | const iAttributedRun *run = i.value; | 1410 | const iAttributedRun *run = i.value; |
1404 | iGlyphBuffer *buf = at_Array(&buffers, index_ArrayConstIterator(&i)); | 1411 | iGlyphBuffer *buf = at_Array(&buffers, index_ArrayConstIterator(&i)); |
1405 | init_GlyphBuffer_(buf, run->font, logicalText); | 1412 | init_GlyphBuffer_(buf, run->font, logicalText); |
1406 | for (int pos = run->logical.start; pos < run->logical.end; pos++) { | 1413 | /* Insert the text in visual order (LTR) in the HarfBuzz buffer for shaping. |
1407 | const int visPos = logToVis[pos]; | 1414 | First we need to map the logical run to the corresponding visual run. */ |
1408 | iChar ch = visualText[visPos]; | 1415 | int v[2] = { logToVis[run->logical.start], logToVis[run->logical.end - 1] }; |
1409 | if (run->flags.isRTL) { | 1416 | if (v[0] > v[1]) { |
1410 | /* Something odd with brackets... My guess is that because the font is not | 1417 | iSwap(int, v[0], v[1]); /* always LTR */ |
1411 | RTL (Noto Sans Arabic seems to lack brackets), they are not flipped | 1418 | } |
1412 | as expected. */ | 1419 | for (int vis = v[0]; vis <= v[1]; vis++) { |
1413 | ch = flipBracket_(ch); | 1420 | hb_buffer_add(buf->hb, visualText[vis], visToLog[vis]); |
1414 | } | ||
1415 | hb_buffer_add(buf->hb, ch, pos); | ||
1416 | } | 1421 | } |
1417 | hb_buffer_set_content_type(buf->hb, HB_BUFFER_CONTENT_TYPE_UNICODE); | 1422 | hb_buffer_set_content_type(buf->hb, HB_BUFFER_CONTENT_TYPE_UNICODE); |
1418 | hb_buffer_set_direction(buf->hb, run->flags.isRTL ? HB_DIRECTION_RTL : HB_DIRECTION_LTR); | 1423 | hb_buffer_set_direction(buf->hb, HB_DIRECTION_LTR); /* visual */ |
1419 | /* hb_buffer_set_script(hbBuf, HB_SCRIPT_LATIN); */ /* will be autodetected */ | 1424 | if (run->flags.isArabic) { |
1425 | hb_buffer_set_script(buf->hb, HB_SCRIPT_ARABIC); | ||
1426 | } | ||
1420 | } | 1427 | } |
1421 | if (isMonospaced) { | 1428 | if (isMonospaced) { |
1422 | /* Fit borrowed glyphs into the expected monospacing. */ | 1429 | /* Fit borrowed glyphs into the expected monospacing. */ |
diff --git a/src/ui/text.h b/src/ui/text.h index 4630b9f6..d3a9f844 100644 --- a/src/ui/text.h +++ b/src/ui/text.h | |||
@@ -133,7 +133,7 @@ enum iTextFont { | |||
133 | sourceSans3_TextFont, | 133 | sourceSans3_TextFont, |
134 | iosevka_TextFont, | 134 | iosevka_TextFont, |
135 | /* families: */ | 135 | /* families: */ |
136 | notoSansArabic_TextFont, | 136 | arabic_TextFont, |
137 | emojiAndSymbols_TextFont, | 137 | emojiAndSymbols_TextFont, |
138 | }; | 138 | }; |
139 | 139 | ||