Markdown Text Highlighting Performance Issues - Tkinter Markdown Text Highlighting Performance Issues - Tkinter tkinter tkinter

Markdown Text Highlighting Performance Issues - Tkinter


I don't know if this solution improves performances but at least it improves the syntax highlighting.

The idea is to make pygments (official documentation here) do the job for us, using pygments.lex(text, lexer) to parse the text, where lexer is pygments' lexer for Markdown syntax. This function returns a list of (token, text) couples and so I use str(token) as a tag name, e.g. the tag "Token.Generic.Strong" corresponds to bold text. To avoid configuring the tags one by one, I use one of the predefined pygments style that I load with the load_style() function.

Unfortunately, pygments' markdown lexer does not recognize bold-italic so I define a custom Lexer class that extends pygments' one.

import tkinterfrom pygments import lexfrom pygments.lexers.markup import MarkdownLexerfrom pygments.token import Genericfrom pygments.lexer import bygroupsfrom pygments.styles import get_style_by_name# add markup for bold-italicclass Lexer(MarkdownLexer):    tokens = {key: val.copy() for key, val in MarkdownLexer.tokens.items()}    # # bold-italic fenced by '***'    tokens['inline'].insert(2, (r'(\*\*\*[^* \n][^*\n]*\*\*\*)',                                bygroups(Generic.StrongEmph)))    # # bold-italic fenced by '___'    tokens['inline'].insert(2, (r'(\_\_\_[^_ \n][^_\n]*\_\_\_)',                                bygroups(Generic.StrongEmph)))    def load_style(stylename):    style = get_style_by_name(stylename)    syntax_highlighting_tags = []    for token, opts in style.list_styles():        kwargs = {}        fg = opts['color']        bg = opts['bgcolor']        if fg:            kwargs['foreground'] = '#' + fg        if bg:            kwargs['background'] = '#' + bg        font = ('Monospace', 10) + tuple(key for key in ('bold', 'italic') if opts[key])        kwargs['font'] = font        kwargs['underline'] = opts['underline']        editor.tag_configure(str(token), **kwargs)        syntax_highlighting_tags.append(str(token))    editor.configure(bg=style.background_color,                     fg=editor.tag_cget("Token.Text", "foreground"),                     selectbackground=style.highlight_color)    editor.tag_configure(str(Generic.StrongEmph), font=('Monospace', 10, 'bold', 'italic'))    syntax_highlighting_tags.append(str(Generic.StrongEmph))    return syntax_highlighting_tags    def check_markdown(start='insert linestart', end='insert lineend'):    data = editor.get(start, end)    while data and data[0] == '\n':        start = editor.index('%s+1c' % start)        data = data[1:]    editor.mark_set('range_start', start)    # clear tags    for t in syntax_highlighting_tags:        editor.tag_remove(t, start, "range_start +%ic" % len(data))    # parse text    for token, content in lex(data, lexer):        editor.mark_set("range_end", "range_start + %ic" % len(content))        for t in token.split():            editor.tag_add(str(t), "range_start", "range_end")        editor.mark_set("range_start", "range_end")root = tkinter.Tk()root.title("Markdown Text Editor")editor = tkinter.Text(root, font="Monospace 10")editor.pack()lexer = Lexer()syntax_highlighting_tags = load_style("monokai")# bind each key Release to the markdown checker functioneditor.bind("<KeyRelease>", lambda event: check_markdown())root.mainloop()

To improve performance, you can bind check_markdown() to only some keys or choose to apply the syntax highlighting only when the user changes line.


If you don't want to use an external library and keep the code simple, using re.finditer() seems faster than Text.search().

You can use a single regular expression to match all cases:

regexp = re.compile(r"((?P<delimiter>\*{1,3})[^*]+?(?P=delimiter)|(?P<delimiter2>\_{1,3})[^_]+?(?P=delimiter2))")

The length of the "delimiter" group gives you the tag and the span of the match gives you where to apply the tag.

Here is the code:

import reimport tkinterroot = tkinter.Tk()root.title("Markdown Text Editor")editor = tkinter.Text(root)editor.pack()# bind each key Release to the markdown checker functioneditor.bind("<KeyRelease>", lambda event: check_markdown())# configure markdown styleseditor.tag_config("bold", foreground="#FF0000") # red for debugging clarityeditor.tag_config("italic", foreground="#00FF00") # green for debugging clarityeditor.tag_config("bold-italic", foreground="#0000FF") # blue for debugging clarityregexp = re.compile(r"((?P<delimiter>\*{1,3})[^*]+?(?P=delimiter)|(?P<delimiter2>\_{1,3})[^_]+?(?P=delimiter2))")tags = {1: "italic", 2: "bold", 3: "bold-italic"}  # the length of the delimiter gives the tagdef check_markdown(start_index="insert linestart", end_index="insert lineend"):    text = editor.get(start_index, end_index)    # remove all tag instances    for tag in tags.values():        editor.tag_remove(tag, start_index, end_index)    # loop through each match and add the corresponding tag    for match in regexp.finditer(text):        groupdict = match.groupdict()        delim = groupdict["delimiter"] # * delimiter        if delim is None:            delim = groupdict["delimiter2"]  # _ delimiter        start, end = match.span()        editor.tag_add(tags[len(delim)], f"{start_index}+{start}c", f"{start_index}+{end}c")    returnroot.mainloop()

Note that check_markdown() only works if start_index and end_index are on the same line, otherwise you need to split the text and do the search line by line.