# -*- coding: utf-8 -*- try: import _jsre as re except: import re import random import time letters = 'abcdefghijklmnopqrstuvwxyz' letters += letters.upper()+'0123456789' class URL: def __init__(self,src): elts = src.split(maxsplit=1) self.href = elts[0] self.alt = '' if len(elts)==2: alt = elts[1] if alt[0]=='"' and alt[-1]=='"':self.alt=alt[1:-1] elif alt[0]=="'" and alt[-1]=="'":self.alt=alt[1:-1] elif alt[0]=="(" and alt[-1]==")":self.alt=alt[1:-1] class CodeBlock: def __init__(self,line): self.lines = [line] if line.startswith("```") and len(line)>3: self.info = line[3:] else: self.info = None def to_html(self): if self.lines[0].startswith("`"): self.lines.pop(0) res = escape('\n'.join(self.lines)) res = unmark(res) _class = self.info or "marked" res = '

%s

\n' %(_class, res) return res,[] class HtmlBlock: def __init__(self, src): self.src = src def to_html(self): return self.src class Marked: def __init__(self, line=''): self.line = line self.children = [] def to_html(self): return apply_markdown(self.line) # get references refs = {} ref_pattern = r"^\[(.*)\]:\s+(.*)" def mark(src): global refs t0 = time.time() refs = {} # split source in sections # sections can be : # - a block-level HTML element (markdown syntax will not be processed) # - a script # - a span-level HTML tag (markdown syntax will be processed) # - a code block # normalise line feeds src = src.replace('\r\n','\n') # lines followed by dashes src = re.sub(r'(.*?)\n=+\n', '\n# \\1\n', src) src = re.sub(r'(.*?)\n-+\n', '\n## \\1\n', src) lines = src.split('\n')+[''] i = bq = 0 ul = ol = 0 while i in a blockquote if lines[i].startswith('>'): nb = 1 while nb': nb += 1 lines[i] = lines[i][nb:] if nb>bq: lines.insert(i,'

'*(nb-bq)) i += 1 bq = nb elif nb'*(bq-nb)) i += 1 bq = nb elif bq>0: lines.insert(i,'

'*bq) i += 1 bq = 0 # unordered lists if lines[i].strip() and lines[i].lstrip()[0] in '-+*' \ and len(lines[i].lstrip())>1 \ and lines[i].lstrip()[1]==' ' \ and (i==0 or ul or not lines[i-1].strip()): # line indentation indicates nesting level nb = 1+len(lines[i])-len(lines[i].lstrip()) lines[i] = '

'+lines[i][nb:] if nb>ul: lines.insert(i,'

'*(ul-nb)) i += 1 ul = nb elif ul and not lines[i].strip(): if i1 and nline[1]==' ': pass else: lines.insert(i,'

'*ul) i += 1 ul = 0 # ordered lists mo = re.search(r'^(\d+\.)',lines[i]) if mo: if not ol: lines.insert(i,'

'+lines[i][len(mo.groups()[0]):] ol = 1 elif ol and not lines[i].strip() and i') i += 1 ol = 0 i += 1 if ul: lines.append(''*ul) if ol: lines.append('

'*ol) if bq: lines.append(''*bq) t1 = time.time() #print('part 1', t1-t0) sections = [] scripts = [] section = Marked() i = 0 while i'): scripts.append('\n'.join(lines[i+1:j])) for k in range(i,j+1): lines[k] = '' break j += 1 i = j continue # atext header elif line.startswith('#'): level = 1 line = lines[i] while level','>') czone = czone.replace('_','_') czone = czone.replace('*','*') return czone def s_escape(mo): # used in re.sub czone = mo.string[mo.start():mo.end()] return escape(czone) def unmark(code_zone): # convert _ to _ inside inline code code_zone = code_zone.replace('_','_') return code_zone def s_unmark(mo): # convert _ to _ inside inline code code_zone = mo.string[mo.start():mo.end()] code_zone = code_zone.replace('_','_') return code_zone def apply_markdown(src): scripts = [] key = None t0 = time.time() i = 0 while i-1 and src[start_a:end_a].find('\n')==-1: link = src[start_a:end_a] rest = src[end_a+1:].lstrip() if rest and rest[0]=='(': j = 0 while True: end_href = rest.find(')',j) if end_href == -1: break if rest[end_href-1]=='\\': j = end_href+1 else: break if end_href>-1 and rest[:end_href].find('\n')==-1: tag = ''+link+'' src = src[:start_a-1]+tag+rest[end_href+1:] i = start_a+len(tag) elif rest and rest[0]=='[': j = 0 while True: end_key = rest.find(']',j) if end_key == -1: break if rest[end_key-1]=='\\': j = end_key+1 else: break if end_key>-1 and rest[:end_key].find('\n')==-1: if not key: key = link if key.lower() not in refs: raise KeyError('unknown reference %s' %key) url = refs[key.lower()] tag = ''+link+'' src = src[:start_a-1]+tag+rest[end_key+1:] i = start_a+len(tag) i += 1 t1 = time.time() #print('apply markdown 1', t1-t0) # before applying the markup with _ and *, isolate HTML tags because # they can contain these characters # We replace them temporarily by a random string rstr = ''.join(random.choice(letters) for i in range(16)) i = 0 state = None start = -1 data = '' tags = [] while i' and state is None: tags.append(src[i:j+1]) src = src[:i]+rstr+src[j+1:] i += len(rstr) break elif state=='"' or state=="'": data += src[j] elif src[j]=='\n': # if a sign < is not followed by > in the same ligne, it # is the sign "lesser than" src = src[:i]+'<'+src[i+1:] j=i+4 break j += 1 elif src[i]=='`' and i>0 and src[i-1]!='\\': # ignore the content of inline code j = i+1 while j", "&" and "_" in inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern,s_escape,src) # replace escaped ` _ * by HTML characters src = src.replace(r'\\`','`') src = src.replace(r'\_','_') src = src.replace(r'\*','*') # emphasis strong_patterns = [('STRONG',r'\*\*(.*?)\*\*'),('B',r'__(.*?)__')] for tag,strong_pattern in strong_patterns: src = re.sub(strong_pattern,r'<%s>\1' %(tag,tag),src) em_patterns = [('EM',r'\*(.*?)\*'),('I',r'\_(.*?)\_')] for tag,em_pattern in em_patterns: src = re.sub(em_pattern,r'<%s>\1' %(tag,tag),src) # inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern,r'\1',src) # restore tags while True: pos = src.rfind(rstr) if pos==-1: break repl = tags.pop() src = src[:pos]+repl+src[pos+len(rstr):] src = '

'+src+'

' t3 = time.time() #print('apply markdown 3', t3-t2) return src,scripts