Source code for evennia.utils.text2html

"""
ANSI -> html converter

Credit for original idea and implementation
goes to Muhammad Alkarouri and his
snippet #577349 on http://code.activestate.com.

(extensively modified by Griatch 2010)
"""

import re
from html import escape as html_escape
from .ansi import *


# All xterm256 RGB equivalents

XTERM256_FG = "\033[38;5;%sm"
XTERM256_BG = "\033[48;5;%sm"


[docs]class TextToHTMLparser(object): """ This class describes a parser for converting from ANSI to html. """ tabstop = 4 # mapping html color name <-> ansi code. hilite = ANSI_HILITE unhilite = ANSI_UNHILITE # this will be stripped - there is no css equivalent. normal = ANSI_NORMAL # " underline = ANSI_UNDERLINE blink = ANSI_BLINK inverse = ANSI_INVERSE # this will produce an outline; no obvious css equivalent? colorcodes = [ ("color-000", unhilite + ANSI_BLACK), # pure black ("color-001", unhilite + ANSI_RED), ("color-002", unhilite + ANSI_GREEN), ("color-003", unhilite + ANSI_YELLOW), ("color-004", unhilite + ANSI_BLUE), ("color-005", unhilite + ANSI_MAGENTA), ("color-006", unhilite + ANSI_CYAN), ("color-007", unhilite + ANSI_WHITE), # light grey ("color-008", hilite + ANSI_BLACK), # dark grey ("color-009", hilite + ANSI_RED), ("color-010", hilite + ANSI_GREEN), ("color-011", hilite + ANSI_YELLOW), ("color-012", hilite + ANSI_BLUE), ("color-013", hilite + ANSI_MAGENTA), ("color-014", hilite + ANSI_CYAN), ("color-015", hilite + ANSI_WHITE), # pure white ] + [("color-%03i" % (i + 16), XTERM256_FG % ("%i" % (i + 16))) for i in range(240)] colorback = [ ("bgcolor-000", ANSI_BACK_BLACK), # pure black ("bgcolor-001", ANSI_BACK_RED), ("bgcolor-002", ANSI_BACK_GREEN), ("bgcolor-003", ANSI_BACK_YELLOW), ("bgcolor-004", ANSI_BACK_BLUE), ("bgcolor-005", ANSI_BACK_MAGENTA), ("bgcolor-006", ANSI_BACK_CYAN), ("bgcolor-007", ANSI_BACK_WHITE), # light grey ("bgcolor-008", hilite + ANSI_BACK_BLACK), # dark grey ("bgcolor-009", hilite + ANSI_BACK_RED), ("bgcolor-010", hilite + ANSI_BACK_GREEN), ("bgcolor-011", hilite + ANSI_BACK_YELLOW), ("bgcolor-012", hilite + ANSI_BACK_BLUE), ("bgcolor-013", hilite + ANSI_BACK_MAGENTA), ("bgcolor-014", hilite + ANSI_BACK_CYAN), ("bgcolor-015", hilite + ANSI_BACK_WHITE), # pure white ] + [("bgcolor-%03i" % (i + 16), XTERM256_BG % ("%i" % (i + 16))) for i in range(240)] # make sure to escape [ # colorcodes = [(c, code.replace("[", r"\[")) for c, code in colorcodes] # colorback = [(c, code.replace("[", r"\[")) for c, code in colorback] fg_colormap = dict((code, clr) for clr, code in colorcodes) bg_colormap = dict((code, clr) for clr, code in colorback) # create stop markers fgstop = "(?:\033\[1m|\033\[22m){0,1}\033\[3[0-8].*?m|\033\[0m|$" bgstop = "(?:\033\[1m|\033\[22m){0,1}\033\[4[0-8].*?m|\033\[0m|$" bgfgstop = bgstop[:-2] + r"(\s*)" + fgstop fgstart = "((?:\033\[1m|\033\[22m){0,1}\033\[3[0-8].*?m)" bgstart = "((?:\033\[1m|\033\[22m){0,1}\033\[4[0-8].*?m)" bgfgstart = bgstart + r"(\s*)" + "((?:\033\[1m|\033\[22m){0,1}\033\[[3-4][0-8].*?m){0,1}" # extract color markers, tagging the start marker and the text marked re_fgs = re.compile(fgstart + "(.*?)(?=" + fgstop + ")") re_bgs = re.compile(bgstart + "(.*?)(?=" + bgstop + ")") re_bgfg = re.compile(bgfgstart + "(.*?)(?=" + bgfgstop + ")") re_normal = re.compile(normal.replace("[", r"\[")) re_hilite = re.compile("(?:%s)(.*)(?=%s|%s)" % (hilite.replace("[", r"\["), fgstop, bgstop)) re_unhilite = re.compile("(?:%s)(.*)(?=%s|%s)" % (unhilite.replace("[", r"\["), fgstop, bgstop)) re_uline = re.compile("(?:%s)(.*?)(?=%s|%s)" % (underline.replace("[", r"\["), fgstop, bgstop)) re_blink = re.compile("(?:%s)(.*?)(?=%s|%s)" % (blink.replace("[", r"\["), fgstop, bgstop)) re_inverse = re.compile("(?:%s)(.*?)(?=%s|%s)" % (inverse.replace("[", r"\["), fgstop, bgstop)) re_string = re.compile( r"(?P<htmlchars>[<&>])|(?P<tab>[\t]+)|(?P<space> +)|" r"(?P<spacestart>^ )|(?P<lineend>\r\n|\r|\n)", re.S | re.M | re.I, ) re_dblspace = re.compile(r" {2,}", re.M) re_url = re.compile( r'(?<!=")((?:ftp|www|https?)\W+(?:(?!\.(?:\s|$)|&\w+;)[^"\',;$*^\\(){}<>\[\]\s])+)(\.(?:\s|$)|&\w+;|)' ) re_mxplink = re.compile(r"\|lc(.*?)\|lt(.*?)\|le", re.DOTALL) re_mxpurl = re.compile(r"\|lu(.*?)\|lt(.*?)\|le", re.DOTALL) def _sub_bgfg(self, colormatch): # print("colormatch.groups()", colormatch.groups()) bgcode, prespace, fgcode, text, postspace = colormatch.groups() if not fgcode: ret = r"""<span class="%s">%s%s%s</span>""" % ( self.bg_colormap.get(bgcode, self.fg_colormap.get(bgcode, "err")), prespace and "&nbsp;" * len(prespace) or "", postspace and "&nbsp;" * len(postspace) or "", text, ) else: ret = r"""<span class="%s"><span class="%s">%s%s%s</span></span>""" % ( self.bg_colormap.get(bgcode, self.fg_colormap.get(bgcode, "err")), self.fg_colormap.get(fgcode, self.bg_colormap.get(fgcode, "err")), prespace and "&nbsp;" * len(prespace) or "", postspace and "&nbsp;" * len(postspace) or "", text, ) return ret def _sub_fg(self, colormatch): code, text = colormatch.groups() return r"""<span class="%s">%s</span>""" % (self.fg_colormap.get(code, "err"), text) def _sub_bg(self, colormatch): code, text = colormatch.groups() return r"""<span class="%s">%s</span>""" % (self.bg_colormap.get(code, "err"), text)
[docs] def re_color(self, text): """ Replace ansi colors with html color class names. Let the client choose how it will display colors, if it wishes to. Args: text (str): the string with color to replace. Returns: text (str): Re-colored text. """ text = self.re_bgfg.sub(self._sub_bgfg, text) text = self.re_fgs.sub(self._sub_fg, text) text = self.re_bgs.sub(self._sub_bg, text) text = self.re_normal.sub("", text) return text
[docs] def re_bold(self, text): """ Clean out superfluous hilights rather than set <strong>to make it match the look of telnet. Args: text (str): Text to process. Returns: text (str): Processed text. """ text = self.re_hilite.sub(r"<strong>\1</strong>", text) return self.re_unhilite.sub(r"\1", text) # strip unhilite - there is no equivalent in css.
[docs] def re_underline(self, text): """ Replace ansi underline with html underline class name. Args: text (str): Text to process. Returns: text (str): Processed text. """ return self.re_uline.sub(r'<span class="underline">\1</span>', text)
[docs] def re_blinking(self, text): """ Replace ansi blink with custom blink css class Args: text (str): Text to process. Returns: text (str): Processed text. """ return self.re_blink.sub(r'<span class="blink">\1</span>', text)
[docs] def re_inversing(self, text): """ Replace ansi inverse with custom inverse css class Args: text (str): Text to process. Returns: text (str): Processed text. """ return self.re_inverse.sub(r'<span class="inverse">\1</span>', text)
[docs] def remove_bells(self, text): """ Remove ansi specials Args: text (str): Text to process. Returns: text (str): Processed text. """ return text.replace("\07", "")
[docs] def remove_backspaces(self, text): """ Removes special escape sequences Args: text (str): Text to process. Returns: text (str): Processed text. """ backspace_or_eol = r"(.\010)|(\033\[K)" n = 1 while n > 0: text, n = re.subn(backspace_or_eol, "", text, 1) return text
[docs] def convert_linebreaks(self, text): """ Extra method for cleaning linebreaks Args: text (str): Text to process. Returns: text (str): Processed text. """ return text.replace("\n", r"<br>")
[docs] def convert_urls(self, text): """ Replace urls (http://...) by valid HTML. Args: text (str): Text to process. Returns: text (str): Processed text. """ # -> added target to output prevent the web browser from attempting to # change pages (and losing our webclient session). return self.re_url.sub(r'<a href="\1" target="_blank">\1</a>\2', text)
[docs] def re_double_space(self, text): """ HTML will swallow any normal space after the first, so if any slipped through we must make sure to replace them with " &nbsp;" """ return self.re_dblspace.sub(self.sub_dblspace, text)
[docs] def sub_mxp_urls(self, match): """ Helper method to be passed to re.sub, replaces MXP links with HTML code. Args: match (re.Matchobject): Match for substitution. Returns: text (str): Processed text. """ url, text = [grp.replace('"', "\\&quot;") for grp in match.groups()] val = ( r"""<a id="mxplink" href="{url}" target="_blank">{text}</a>""".format(url=url, text=text) ) return val
[docs] def sub_text(self, match): """ Helper method to be passed to re.sub, for handling all substitutions. Args: match (re.Matchobject): Match for substitution. Returns: text (str): Processed text. """ cdict = match.groupdict() if cdict["htmlchars"]: return html_escape(cdict["htmlchars"]) elif cdict["lineend"]: return "<br>" elif cdict["tab"]: text = cdict["tab"].replace("\t", " " + "&nbsp;" * (self.tabstop - 1)) return text elif cdict["space"] or cdict["spacestart"]: text = cdict["space"] text = " " if len(text) == 1 else " " + text[1:].replace(" ", "&nbsp;") return text return None
[docs] def sub_dblspace(self, match): "clean up double-spaces" return " " + "&nbsp;" * (len(match.group()) - 1)
[docs] def parse(self, text, strip_ansi=False): """ Main access function, converts a text containing ANSI codes into html statements. Args: text (str): Text to process. strip_ansi (bool, optional): Returns: text (str): Parsed text. """ # parse everything to ansi first text = parse_ansi(text, strip_ansi=strip_ansi, xterm256=True, mxp=True) # convert all ansi to html result = re.sub(self.re_string, self.sub_text, text) result = re.sub(self.re_mxplink, self.sub_mxp_links, result) result = re.sub(self.re_mxpurl, self.sub_mxp_urls, result) result = self.re_color(result) result = self.re_bold(result) result = self.re_underline(result) result = self.re_blinking(result) result = self.re_inversing(result) result = self.remove_bells(result) result = self.convert_linebreaks(result) result = self.remove_backspaces(result) result = self.convert_urls(result) result = self.re_double_space(result) # clean out eventual ansi that was missed # result = parse_ansi(result, strip_ansi=True) return result
HTML_PARSER = TextToHTMLparser() # # Access function #
[docs]def parse_html(string, strip_ansi=False, parser=HTML_PARSER): """ Parses a string, replace ANSI markup with html """ return parser.parse(string, strip_ansi=strip_ansi)