Markdown Text Highlighting Performance Issues - Tkinter

python-3.x regex tkinter markdown tkinter-text

I don't know if this solution improves performances but at least it improves the syntax highlighting.

The idea is to make pygments (official documentation here) do the job for us, using pygments.lex(text, lexer) to parse the text, where lexer is pygments' lexer for Markdown syntax. This function returns a list of (token, text) couples and so I use str(token) as a tag name, e.g. the tag "Token.Generic.Strong" corresponds to bold text. To avoid configuring the tags one by one, I use one of the predefined pygments style that I load with the load_style() function.

Unfortunately, pygments' markdown lexer does not recognize bold-italic so I define a custom Lexer class that extends pygments' one.

import tkinterfrom pygments import lexfrom pygments.lexers.markup import MarkdownLexerfrom pygments.token import Genericfrom pygments.lexer import bygroupsfrom pygments.styles import get_style_by_name# add markup for bold-italicclass Lexer(MarkdownLexer):    tokens = {key: val.copy() for key, val in MarkdownLexer.tokens.items()}    # # bold-italic fenced by '***'    tokens['inline'].insert(2, (r'(\*\*\*[^* \n][^*\n]*\*\*\*)',                                bygroups(Generic.StrongEmph)))    # # bold-italic fenced by '___'    tokens['inline'].insert(2, (r'(\_\_\_[^_ \n][^_\n]*\_\_\_)',                                bygroups(Generic.StrongEmph)))    def load_style(stylename):    style = get_style_by_name(stylename)    syntax_highlighting_tags = []    for token, opts in style.list_styles():        kwargs = {}        fg = opts['color']        bg = opts['bgcolor']        if fg:            kwargs['foreground'] = '#' + fg        if bg:            kwargs['background'] = '#' + bg        font = ('Monospace', 10) + tuple(key for key in ('bold', 'italic') if opts[key])        kwargs['font'] = font        kwargs['underline'] = opts['underline']        editor.tag_configure(str(token), **kwargs)        syntax_highlighting_tags.append(str(token))    editor.configure(bg=style.background_color,                     fg=editor.tag_cget("Token.Text", "foreground"),                     selectbackground=style.highlight_color)    editor.tag_configure(str(Generic.StrongEmph), font=('Monospace', 10, 'bold', 'italic'))    syntax_highlighting_tags.append(str(Generic.StrongEmph))    return syntax_highlighting_tags    def check_markdown(start='insert linestart', end='insert lineend'):    data = editor.get(start, end)    while data and data[0] == '\n':        start = editor.index('%s+1c' % start)        data = data[1:]    editor.mark_set('range_start', start)    # clear tags    for t in syntax_highlighting_tags:        editor.tag_remove(t, start, "range_start +%ic" % len(data))    # parse text    for token, content in lex(data, lexer):        editor.mark_set("range_end", "range_start + %ic" % len(content))        for t in token.split():            editor.tag_add(str(t), "range_start", "range_end")        editor.mark_set("range_start", "range_end")root = tkinter.Tk()root.title("Markdown Text Editor")editor = tkinter.Text(root, font="Monospace 10")editor.pack()lexer = Lexer()syntax_highlighting_tags = load_style("monokai")# bind each key Release to the markdown checker functioneditor.bind("<KeyRelease>", lambda event: check_markdown())root.mainloop()

To improve performance, you can bind check_markdown() to only some keys or choose to apply the syntax highlighting only when the user changes line.

python-3.x regex tkinter markdown tkinter-text

If you don't want to use an external library and keep the code simple, using re.finditer() seems faster than Text.search().

You can use a single regular expression to match all cases:

regexp = re.compile(r"((?P<delimiter>\*{1,3})[^*]+?(?P=delimiter)|(?P<delimiter2>\_{1,3})[^_]+?(?P=delimiter2))")

The length of the "delimiter" group gives you the tag and the span of the match gives you where to apply the tag.

Here is the code:

import reimport tkinterroot = tkinter.Tk()root.title("Markdown Text Editor")editor = tkinter.Text(root)editor.pack()# bind each key Release to the markdown checker functioneditor.bind("<KeyRelease>", lambda event: check_markdown())# configure markdown styleseditor.tag_config("bold", foreground="#FF0000") # red for debugging clarityeditor.tag_config("italic", foreground="#00FF00") # green for debugging clarityeditor.tag_config("bold-italic", foreground="#0000FF") # blue for debugging clarityregexp = re.compile(r"((?P<delimiter>\*{1,3})[^*]+?(?P=delimiter)|(?P<delimiter2>\_{1,3})[^_]+?(?P=delimiter2))")tags = {1: "italic", 2: "bold", 3: "bold-italic"}  # the length of the delimiter gives the tagdef check_markdown(start_index="insert linestart", end_index="insert lineend"):    text = editor.get(start_index, end_index)    # remove all tag instances    for tag in tags.values():        editor.tag_remove(tag, start_index, end_index)    # loop through each match and add the corresponding tag    for match in regexp.finditer(text):        groupdict = match.groupdict()        delim = groupdict["delimiter"] # * delimiter        if delim is None:            delim = groupdict["delimiter2"]  # _ delimiter        start, end = match.span()        editor.tag_add(tags[len(delim)], f"{start_index}+{start}c", f"{start_index}+{end}c")    returnroot.mainloop()

Note that check_markdown() only works if start_index and end_index are on the same line, otherwise you need to split the text and do the search line by line.

CodeHunter

Markdown Text Highlighting Performance Issues - Tkinter

Recent Posts

How can I color dots in a xy scatterplot according to column value?

How to update a claim in ASP.NET Identity?

What does {0} mean when initializing an object?

Accessing members of items in a JSONArray with Java

How to log SQL statements in Spring Boot?

Powershell Get-WebSite name parameter is ignored

How to detect scroll to bottom of html element

Java synchronized method

How to test controllers with CodeIgniter?

Detect Visual Composer

Matplotlib: Specify format of floats for tick labels

Rails join a list of strings with commas and "and" before the last