diff options
author | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-10-12 12:59:12 +0300 |
---|---|---|
committer | Jaakko Keränen <jaakko.keranen@iki.fi> | 2021-10-12 12:59:12 +0300 |
commit | e884330ef73b2f557486a898a67a716f29887170 (patch) | |
tree | 479eb34232be69b6cf73878775c5aef754cf3797 | |
parent | c490862b7fb04550364ce98f98b9e4d1fcaa13b8 (diff) |
Experimental Markdown rendering
Convert Markdown to Gemtext and use ANSI escape sequences to switch fonts (bold, italic, monospace). The conversion is still a bit buggy...
-rw-r--r-- | src/defs.h | 1 | ||||
-rw-r--r-- | src/gmdocument.c | 179 | ||||
-rw-r--r-- | src/gmutil.c | 6 | ||||
-rw-r--r-- | src/ui/documentwidget.c | 4 | ||||
-rw-r--r-- | src/ui/text.c | 37 |
5 files changed, 224 insertions, 3 deletions
@@ -28,6 +28,7 @@ enum iSourceFormat { | |||
28 | undefined_SourceFormat = -1, | 28 | undefined_SourceFormat = -1, |
29 | gemini_SourceFormat = 0, | 29 | gemini_SourceFormat = 0, |
30 | plainText_SourceFormat, | 30 | plainText_SourceFormat, |
31 | markdown_SourceFormat, | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | enum iFileVersion { | 34 | enum iFileVersion { |
diff --git a/src/gmdocument.c b/src/gmdocument.c index 508047a6..f0d9bf08 100644 --- a/src/gmdocument.c +++ b/src/gmdocument.c | |||
@@ -1712,8 +1712,183 @@ void setUrl_GmDocument(iGmDocument *d, const iString *url) { | |||
1712 | updateIconBasedOnUrl_GmDocument_(d); | 1712 | updateIconBasedOnUrl_GmDocument_(d); |
1713 | } | 1713 | } |
1714 | 1714 | ||
1715 | static int replaceRegExp_String(iString *d, const iRegExp *regexp, const char *replacement, | ||
1716 | void (*matchHandler)(void *, const iRegExpMatch *), | ||
1717 | void *context) { | ||
1718 | iRegExpMatch m; | ||
1719 | iString result; | ||
1720 | int numMatches = 0; | ||
1721 | const char *pos = constBegin_String(d); | ||
1722 | init_RegExpMatch(&m); | ||
1723 | init_String(&result); | ||
1724 | while (matchString_RegExp(regexp, d, &m)) { | ||
1725 | appendRange_String(&result, (iRangecc){ pos, begin_RegExpMatch(&m) }); | ||
1726 | /* Replace any capture group back-references. */ | ||
1727 | for (const char *ch = replacement; *ch; ch++) { | ||
1728 | if (*ch == '\\') { | ||
1729 | ch++; | ||
1730 | if (*ch == '\\') { | ||
1731 | appendCStr_String(&result, "\\"); | ||
1732 | } | ||
1733 | else if (*ch >= '0' && *ch <= '9') { | ||
1734 | appendRange_String(&result, capturedRange_RegExpMatch(&m, *ch - '0')); | ||
1735 | } | ||
1736 | } | ||
1737 | else { | ||
1738 | appendData_Block(&result.chars, ch, 1); | ||
1739 | } | ||
1740 | } | ||
1741 | if (matchHandler) { | ||
1742 | matchHandler(context, &m); | ||
1743 | } | ||
1744 | pos = end_RegExpMatch(&m); | ||
1745 | numMatches++; | ||
1746 | } | ||
1747 | appendRange_String(&result, (iRangecc){ pos, constEnd_String(d) }); | ||
1748 | set_String(d, &result); | ||
1749 | deinit_String(&result); | ||
1750 | return numMatches; | ||
1751 | } | ||
1752 | |||
1753 | iDeclareType(PendingLink) | ||
1754 | struct Impl_PendingLink { | ||
1755 | iString *url; | ||
1756 | iString *title; | ||
1757 | }; | ||
1758 | |||
1759 | static void addPendingLink_(void *context, const iRegExpMatch *m) { | ||
1760 | pushBack_Array(context, &(iPendingLink){ | ||
1761 | .url = captured_RegExpMatch(m, 2), | ||
1762 | .title = captured_RegExpMatch(m, 1) | ||
1763 | }); | ||
1764 | } | ||
1765 | |||
1766 | static void addPendingNamedLink_(void *context, const iRegExpMatch *m) { | ||
1767 | pushBack_Array(context, &(iPendingLink){ | ||
1768 | .url = newFormat_String("[]%s", cstr_Rangecc(capturedRange_RegExpMatch(m, 2))), | ||
1769 | .title = captured_RegExpMatch(m, 1) | ||
1770 | }); | ||
1771 | } | ||
1772 | |||
1773 | static void flushPendingLinks_(iArray *links, const iString *source, iString *out) { | ||
1774 | iRegExp *namePattern = new_RegExp("\n\\s*\\[(.+?)\\]\\s*:\\s*([^\n]+)", 0); | ||
1775 | if (!endsWith_String(out, "\n")) { | ||
1776 | appendCStr_String(out, "\n"); | ||
1777 | } | ||
1778 | iForEach(Array, i, links) { | ||
1779 | iPendingLink *pending = i.value; | ||
1780 | const char *url = cstr_String(pending->url); | ||
1781 | if (startsWith_CStr(url, "[]")) { | ||
1782 | /* Find the matching named link. */ | ||
1783 | iRegExpMatch m; | ||
1784 | init_RegExpMatch(&m); | ||
1785 | while (matchString_RegExp(namePattern, source, &m)) { | ||
1786 | if (equal_Rangecc(capturedRange_RegExpMatch(&m, 1), url + 2)) { | ||
1787 | url = cstrCollect_String(captured_RegExpMatch(&m, 2)); | ||
1788 | break; | ||
1789 | } | ||
1790 | } | ||
1791 | } | ||
1792 | appendFormat_String(out, "\n=> %s %s", url, cstr_String(pending->title)); | ||
1793 | delete_String(pending->url); | ||
1794 | delete_String(pending->title); | ||
1795 | } | ||
1796 | clear_Array(links); | ||
1797 | iRelease(namePattern); | ||
1798 | } | ||
1799 | |||
1800 | static void convertMarkdownToGemtext_GmDocument_(iGmDocument *d) { | ||
1801 | iAssert(d->format == markdown_SourceFormat); | ||
1802 | /* Get rid of indented preformats. */ { | ||
1803 | iArray *pendingLinks = collectNew_Array(sizeof(iPendingLink)); | ||
1804 | const iRegExp *imageLinkPattern = iClob(new_RegExp("\n?!\\[(.+)\\]\\(([^)]+)\\)\n?", 0)); | ||
1805 | const iRegExp *linkPattern = iClob(new_RegExp("\\[(.+?)\\]\\(([^)]+)\\)", 0)); | ||
1806 | const iRegExp *namedLinkPattern = iClob(new_RegExp("\\[(.+?)\\]\\[(.+?)\\]", 0)); | ||
1807 | const iRegExp *namePattern = iClob(new_RegExp("\\s*\\[(.+?)\\]\\s*:\\s*([^\n]+)", 0)); | ||
1808 | iString result; | ||
1809 | init_String(&result); | ||
1810 | iRangecc line = iNullRange; | ||
1811 | iBool isPre = iFalse; | ||
1812 | iBool isLastEmpty = iFalse; | ||
1813 | while (nextSplit_Rangecc(range_String(&d->source), "\n", &line)) { | ||
1814 | if (!isPre) { | ||
1815 | if (*line.start == '#') { | ||
1816 | flushPendingLinks_(pendingLinks, &d->source, &result); | ||
1817 | } | ||
1818 | if (isEmpty_Range(&line)) { | ||
1819 | isLastEmpty = iTrue; | ||
1820 | continue; | ||
1821 | } | ||
1822 | if (isLastEmpty) { | ||
1823 | appendCStr_String(&result, "\n\n"); | ||
1824 | } | ||
1825 | else if (size_Range(&line) >= 2 && isnumber(line.start[0]) && | ||
1826 | (line.start[1] == '.' || | ||
1827 | (isnumber(line.start[1]) && line.start[2] == '.'))) { | ||
1828 | appendCStr_String(&result, "\n\n"); | ||
1829 | } | ||
1830 | else if (*line.start == '*' || *line.start == '>' || *line.start == '#') { | ||
1831 | appendCStr_String(&result, "\n"); | ||
1832 | } | ||
1833 | else { | ||
1834 | appendCStr_String(&result, " "); | ||
1835 | } | ||
1836 | isLastEmpty = iFalse; | ||
1837 | } | ||
1838 | if (startsWith_Rangecc(line, " ")) { | ||
1839 | line.start += 4; | ||
1840 | if (!isPre) { | ||
1841 | appendCStr_String(&result, "```\n"); | ||
1842 | isPre = iTrue; | ||
1843 | } | ||
1844 | } | ||
1845 | else if (isPre) { | ||
1846 | if (!endsWith_String(&result, "\n")) { | ||
1847 | appendCStr_String(&result, "\n"); | ||
1848 | } | ||
1849 | appendCStr_String(&result, "```\n"); | ||
1850 | isPre = iFalse; | ||
1851 | } | ||
1852 | /* Check for image links. */ | ||
1853 | if (isPre) { | ||
1854 | appendRange_String(&result, line); | ||
1855 | appendCStr_String(&result, "\n"); | ||
1856 | } | ||
1857 | else { | ||
1858 | iString ln; | ||
1859 | initRange_String(&ln, line); | ||
1860 | replaceRegExp_String(&ln, iClob(new_RegExp("\\*\\*(.+?)\\*\\*", 0)), "\x1b[1m\\1\x1b[0m", NULL, NULL); | ||
1861 | replaceRegExp_String(&ln, iClob(new_RegExp("\\b\\*(.+?)\\*\\b", 0)), "\x1b[3m\\1\x1b[0m", NULL, NULL); | ||
1862 | replaceRegExp_String(&ln, iClob(new_RegExp("\\b_(.+?)_\\b", 0)), "\x1b[3m\\1\x1b[0m", NULL, NULL); | ||
1863 | replaceRegExp_String(&ln, iClob(new_RegExp("```([^`]+?)```", 0)), "\n```\n\\1\n```\n", NULL, NULL); | ||
1864 | replaceRegExp_String(&ln, namePattern, "", NULL, 0); | ||
1865 | replaceRegExp_String(&ln, imageLinkPattern, "\n=> \\2 \\1\n", NULL, NULL); | ||
1866 | replaceRegExp_String(&ln, namedLinkPattern, "\\1", addPendingNamedLink_, pendingLinks); | ||
1867 | replaceRegExp_String(&ln, linkPattern, "\\1", addPendingLink_, pendingLinks); | ||
1868 | replaceRegExp_String(&ln, iClob(new_RegExp("(?<!`)`([^`]*?)`(?!`)", 0)), "\x1b[4m\\1\x1b[0m", NULL, NULL); | ||
1869 | append_String(&result, &ln); | ||
1870 | deinit_String(&ln); | ||
1871 | } | ||
1872 | } | ||
1873 | flushPendingLinks_(pendingLinks, &d->source, &result); | ||
1874 | set_String(&d->source, &result); | ||
1875 | deinit_String(&result); | ||
1876 | } | ||
1877 | /* Replace Markdown syntax with equivalent Gemtext, where possible. */ | ||
1878 | // replaceRegExp_String(&d->source, iClob(new_RegExp("```([^`]+)```", 0)), "\n\n```\v\\1\v```\n\n"); | ||
1879 | // replaceRegExp_String(&d->source, iClob(new_RegExp("\n\\s*([0-9]+)\\.", 0)), "\n\n\\1."); /* numbered list */ | ||
1880 | replaceRegExp_String(&d->source, iClob(new_RegExp("(\\s*\n){2,}", 0)), "\n\n", NULL, NULL); /* normalize paragraph breaks */ | ||
1881 | printf("Converted:\n%s", cstr_String(&d->source)); | ||
1882 | // replaceRegExp_String(&d->source, iClob(new_RegExp("\n(?![*>#]\\s)", 0)), " "); /* normal line breaks */ | ||
1883 | // replace_String(&d->source, "\f", "\n\n"); | ||
1884 | // replace_String(&d->source, "\v", "\n"); | ||
1885 | d->format = gemini_SourceFormat; | ||
1886 | } | ||
1887 | |||
1715 | void setSource_GmDocument(iGmDocument *d, const iString *source, int width, int outsideMargin, | 1888 | void setSource_GmDocument(iGmDocument *d, const iString *source, int width, int outsideMargin, |
1716 | enum iGmDocumentUpdate updateType) { | 1889 | enum iGmDocumentUpdate updateType) { |
1890 | /* TODO: This API has been set up to allow partial/progressive updating of the content. | ||
1891 | Currently the entire source is replaced every time, though. */ | ||
1717 | // printf("[GmDocument] source update (%zu bytes), width:%d, final:%d\n", | 1892 | // printf("[GmDocument] source update (%zu bytes), width:%d, final:%d\n", |
1718 | // size_String(source), width, updateType == final_GmDocumentUpdate); | 1893 | // size_String(source), width, updateType == final_GmDocumentUpdate); |
1719 | if (size_String(source) == size_String(&d->unormSource)) { | 1894 | if (size_String(source) == size_String(&d->unormSource)) { |
@@ -1724,6 +1899,10 @@ void setSource_GmDocument(iGmDocument *d, const iString *source, int width, int | |||
1724 | set_String(&d->unormSource, source); | 1899 | set_String(&d->unormSource, source); |
1725 | /* Normalize. */ | 1900 | /* Normalize. */ |
1726 | set_String(&d->source, &d->unormSource); | 1901 | set_String(&d->source, &d->unormSource); |
1902 | if (d->format == markdown_SourceFormat) { | ||
1903 | convertMarkdownToGemtext_GmDocument_(d); | ||
1904 | set_String(&d->unormSource, &d->source); | ||
1905 | } | ||
1727 | if (isNormalized_GmDocument_(d)) { | 1906 | if (isNormalized_GmDocument_(d)) { |
1728 | normalize_GmDocument(d); | 1907 | normalize_GmDocument(d); |
1729 | } | 1908 | } |
diff --git a/src/gmutil.c b/src/gmutil.c index 5be7e198..692c1cb9 100644 --- a/src/gmutil.c +++ b/src/gmutil.c | |||
@@ -566,6 +566,12 @@ const char *mediaTypeFromFileExtension_String(const iString *d) { | |||
566 | else if (endsWithCase_String(d, ".mid")) { | 566 | else if (endsWithCase_String(d, ".mid")) { |
567 | return "audio/midi"; | 567 | return "audio/midi"; |
568 | } | 568 | } |
569 | else if (endsWithCase_String(d, ".md") || | ||
570 | endsWithCase_String(d, ".markdown") || | ||
571 | endsWithCase_String(d, ".mdown") || | ||
572 | endsWithCase_String(d, ".markdn")) { | ||
573 | return "text/markdown"; | ||
574 | } | ||
569 | else if (endsWithCase_String(d, ".txt") || | 575 | else if (endsWithCase_String(d, ".txt") || |
570 | endsWithCase_String(d, ".ini") || | 576 | endsWithCase_String(d, ".ini") || |
571 | endsWithCase_String(d, ".md") || | 577 | endsWithCase_String(d, ".md") || |
diff --git a/src/ui/documentwidget.c b/src/ui/documentwidget.c index ee669c1a..8c87ba1a 100644 --- a/src/ui/documentwidget.c +++ b/src/ui/documentwidget.c | |||
@@ -1481,6 +1481,10 @@ static void updateDocument_DocumentWidget_(iDocumentWidget *d, | |||
1481 | docFormat = gemini_SourceFormat; | 1481 | docFormat = gemini_SourceFormat; |
1482 | setRange_String(&d->sourceMime, param); | 1482 | setRange_String(&d->sourceMime, param); |
1483 | } | 1483 | } |
1484 | else if (equal_Rangecc(param, "text/markdown")) { | ||
1485 | docFormat = markdown_SourceFormat; | ||
1486 | setRange_String(&d->sourceMime, param); | ||
1487 | } | ||
1484 | else if (startsWith_Rangecc(param, "text/") || | 1488 | else if (startsWith_Rangecc(param, "text/") || |
1485 | equal_Rangecc(param, "application/json") || | 1489 | equal_Rangecc(param, "application/json") || |
1486 | equal_Rangecc(param, "application/x-pem-file") || | 1490 | equal_Rangecc(param, "application/x-pem-file") || |
diff --git a/src/ui/text.c b/src/ui/text.c index 3ed5b327..3d2cdf5d 100644 --- a/src/ui/text.c +++ b/src/ui/text.c | |||
@@ -914,6 +914,18 @@ static void finishRun_AttributedText_(iAttributedText *d, iAttributedRun *run, i | |||
914 | run->logical.start = endAt; | 914 | run->logical.start = endAt; |
915 | } | 915 | } |
916 | 916 | ||
917 | static iFont *withStyle_Font_(const iFont *d, enum iFontStyle styleId) { | ||
918 | const int fontId = (fontId_Text_(d) / maxVariants_Fonts) * maxVariants_Fonts; | ||
919 | const int sizeId = sizeId_Text_(d); | ||
920 | return font_Text_(FONT_ID(fontId, styleId, sizeId)); | ||
921 | } | ||
922 | |||
923 | static iFont *withFontId_Font_(const iFont *d, enum iFontId fontId) { | ||
924 | const int styleId = styleId_Text_(d); | ||
925 | const int sizeId = sizeId_Text_(d); | ||
926 | return font_Text_(FONT_ID(fontId, styleId, sizeId)); | ||
927 | } | ||
928 | |||
917 | static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir, iChar overrideChar) { | 929 | static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir, iChar overrideChar) { |
918 | iAssert(isEmpty_Array(&d->runs)); | 930 | iAssert(isEmpty_Array(&d->runs)); |
919 | size_t length = 0; | 931 | size_t length = 0; |
@@ -976,6 +988,7 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir, iCh | |||
976 | const iChar * logicalText = constData_Array(&d->logical); | 988 | const iChar * logicalText = constData_Array(&d->logical); |
977 | iBool isRTL = d->isBaseRTL; | 989 | iBool isRTL = d->isBaseRTL; |
978 | int numNonSpace = 0; | 990 | int numNonSpace = 0; |
991 | iFont * activeFont = d->font; | ||
979 | for (int pos = 0; pos < length; pos++) { | 992 | for (int pos = 0; pos < length; pos++) { |
980 | const iChar ch = logicalText[pos]; | 993 | const iChar ch = logicalText[pos]; |
981 | #if defined (LAGRANGE_ENABLE_FRIBIDI) | 994 | #if defined (LAGRANGE_ENABLE_FRIBIDI) |
@@ -1004,8 +1017,23 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir, iCh | |||
1004 | init_RegExpMatch(&m); | 1017 | init_RegExpMatch(&m); |
1005 | if (match_RegExp(activeText_->ansiEscape, srcPos, d->source.end - srcPos, &m)) { | 1018 | if (match_RegExp(activeText_->ansiEscape, srcPos, d->source.end - srcPos, &m)) { |
1006 | finishRun_AttributedText_(d, &run, pos - 1); | 1019 | finishRun_AttributedText_(d, &run, pos - 1); |
1007 | run.fgColor = ansiForeground_Color(capturedRange_RegExpMatch(&m, 1), | 1020 | const iRangecc sequence = capturedRange_RegExpMatch(&m, 1); |
1008 | tmParagraph_ColorId); | 1021 | if (equal_Rangecc(sequence, "1")) { |
1022 | activeFont = withStyle_Font_(activeFont, bold_FontStyle); | ||
1023 | } | ||
1024 | else if (equal_Rangecc(sequence, "3")) { | ||
1025 | activeFont = withStyle_Font_(activeFont, italic_FontStyle); | ||
1026 | } | ||
1027 | else if (equal_Rangecc(sequence, "4")) { | ||
1028 | activeFont = withFontId_Font_(activeFont, monospace_FontId); | ||
1029 | } | ||
1030 | else if (equal_Rangecc(sequence, "0")) { | ||
1031 | activeFont = d->font; /* restore original */ | ||
1032 | run.fgColor = d->fgColor; | ||
1033 | } | ||
1034 | else { | ||
1035 | run.fgColor = ansiForeground_Color(sequence, tmParagraph_ColorId); | ||
1036 | } | ||
1009 | pos += length_Rangecc(capturedRange_RegExpMatch(&m, 0)); | 1037 | pos += length_Rangecc(capturedRange_RegExpMatch(&m, 0)); |
1010 | iAssert(logToSource[pos] == end_RegExpMatch(&m) - d->source.start); | 1038 | iAssert(logToSource[pos] == end_RegExpMatch(&m) - d->source.start); |
1011 | /* The run continues after the escape sequence. */ | 1039 | /* The run continues after the escape sequence. */ |
@@ -1047,7 +1075,7 @@ static void prepare_AttributedText_(iAttributedText *d, int overrideBaseDir, iCh | |||
1047 | } | 1075 | } |
1048 | continue; | 1076 | continue; |
1049 | } | 1077 | } |
1050 | iFont *currentFont = d->font; | 1078 | iFont *currentFont = activeFont; |
1051 | if (run.font->fontSpec->flags & arabic_FontSpecFlag && isPunct_Char(ch)) { | 1079 | if (run.font->fontSpec->flags & arabic_FontSpecFlag && isPunct_Char(ch)) { |
1052 | currentFont = run.font; /* remain as Arabic for whitespace */ | 1080 | currentFont = run.font; /* remain as Arabic for whitespace */ |
1053 | } | 1081 | } |
@@ -1766,6 +1794,9 @@ static iRect run_Font_(iFont *d, const iRunArgs *args) { | |||
1766 | orig.y + yCursor - yOffset + glyph->font->baseline + glyph->d[hoff].y, | 1794 | orig.y + yCursor - yOffset + glyph->font->baseline + glyph->d[hoff].y, |
1767 | glyph->rect[hoff].size.x, | 1795 | glyph->rect[hoff].size.x, |
1768 | glyph->rect[hoff].size.y }; | 1796 | glyph->rect[hoff].size.y }; |
1797 | if (run->font->height < d->height) { | ||
1798 | dst.y += d->baseline - run->font->baseline; | ||
1799 | } | ||
1769 | if (mode & visualFlag_RunMode) { | 1800 | if (mode & visualFlag_RunMode) { |
1770 | if (isEmpty_Rect(bounds)) { | 1801 | if (isEmpty_Rect(bounds)) { |
1771 | bounds = init_Rect(dst.x, dst.y, dst.w, dst.h); | 1802 | bounds = init_Rect(dst.x, dst.y, dst.w, dst.h); |