Show More
Commit Description:
merge with algo and add brython files that were missing
Commit Description:
merge with algo and add brython files that were missing
References:
File last commit:
Show/Diff file:
Action:
lib/assets/Lib/browser/markdown.py
| 423 lines
| 13.2 KiB
| text/x-python
| PythonLexer
|
r584 | # -*- coding: utf-8 -*- | |||
try: | ||||
import _jsre as re | ||||
except: | ||||
import re | ||||
import random | ||||
import time | ||||
letters = 'abcdefghijklmnopqrstuvwxyz' | ||||
letters += letters.upper()+'0123456789' | ||||
class URL: | ||||
def __init__(self,src): | ||||
elts = src.split(maxsplit=1) | ||||
self.href = elts[0] | ||||
self.alt = '' | ||||
if len(elts)==2: | ||||
alt = elts[1] | ||||
if alt[0]=='"' and alt[-1]=='"':self.alt=alt[1:-1] | ||||
elif alt[0]=="'" and alt[-1]=="'":self.alt=alt[1:-1] | ||||
elif alt[0]=="(" and alt[-1]==")":self.alt=alt[1:-1] | ||||
class CodeBlock: | ||||
def __init__(self,line): | ||||
self.lines = [line] | ||||
if line.startswith("```") and len(line)>3: | ||||
self.info = line[3:] | ||||
else: | ||||
self.info = None | ||||
def to_html(self): | ||||
if self.lines[0].startswith("`"): | ||||
self.lines.pop(0) | ||||
res = escape('\n'.join(self.lines)) | ||||
res = unmark(res) | ||||
_class = self.info or "marked" | ||||
res = '<pre class="%s">%s</pre>\n' %(_class, res) | ||||
return res,[] | ||||
class HtmlBlock: | ||||
def __init__(self, src): | ||||
self.src = src | ||||
def to_html(self): | ||||
return self.src | ||||
class Marked: | ||||
def __init__(self, line=''): | ||||
self.line = line | ||||
self.children = [] | ||||
def to_html(self): | ||||
return apply_markdown(self.line) | ||||
# get references | ||||
refs = {} | ||||
ref_pattern = r"^\[(.*)\]:\s+(.*)" | ||||
def mark(src): | ||||
global refs | ||||
t0 = time.time() | ||||
refs = {} | ||||
# split source in sections | ||||
# sections can be : | ||||
# - a block-level HTML element (markdown syntax will not be processed) | ||||
# - a script | ||||
# - a span-level HTML tag (markdown syntax will be processed) | ||||
# - a code block | ||||
# normalise line feeds | ||||
src = src.replace('\r\n','\n') | ||||
# lines followed by dashes | ||||
src = re.sub(r'(.*?)\n=+\n', '\n# \\1\n', src) | ||||
src = re.sub(r'(.*?)\n-+\n', '\n## \\1\n', src) | ||||
lines = src.split('\n')+[''] | ||||
i = bq = 0 | ||||
ul = ol = 0 | ||||
while i<len(lines): | ||||
# enclose lines starting by > in a blockquote | ||||
if lines[i].startswith('>'): | ||||
nb = 1 | ||||
while nb<len(lines[i]) and lines[i][nb]=='>': | ||||
nb += 1 | ||||
lines[i] = lines[i][nb:] | ||||
if nb>bq: | ||||
lines.insert(i,'<blockquote>'*(nb-bq)) | ||||
i += 1 | ||||
bq = nb | ||||
elif nb<bq: | ||||
lines.insert(i,'</blockquote>'*(bq-nb)) | ||||
i += 1 | ||||
bq = nb | ||||
elif bq>0: | ||||
lines.insert(i,'</blockquote>'*bq) | ||||
i += 1 | ||||
bq = 0 | ||||
# unordered lists | ||||
if lines[i].strip() and lines[i].lstrip()[0] in '-+*' \ | ||||
and len(lines[i].lstrip())>1 \ | ||||
and lines[i].lstrip()[1]==' ' \ | ||||
and (i==0 or ul or not lines[i-1].strip()): | ||||
# line indentation indicates nesting level | ||||
nb = 1+len(lines[i])-len(lines[i].lstrip()) | ||||
lines[i] = '<li>'+lines[i][nb:] | ||||
if nb>ul: | ||||
lines.insert(i,'<ul>'*(nb-ul)) | ||||
i += 1 | ||||
elif nb<ul: | ||||
lines.insert(i,'</ul>'*(ul-nb)) | ||||
i += 1 | ||||
ul = nb | ||||
elif ul and not lines[i].strip(): | ||||
if i<len(lines)-1 and lines[i+1].strip() \ | ||||
and not lines[i+1].startswith(' '): | ||||
nline = lines[i+1].lstrip() | ||||
if nline[0] in '-+*' and len(nline)>1 and nline[1]==' ': | ||||
pass | ||||
else: | ||||
lines.insert(i,'</ul>'*ul) | ||||
i += 1 | ||||
ul = 0 | ||||
# ordered lists | ||||
mo = re.search(r'^(\d+\.)',lines[i]) | ||||
if mo: | ||||
if not ol: | ||||
lines.insert(i,'<ol>') | ||||
i += 1 | ||||
lines[i] = '<li>'+lines[i][len(mo.groups()[0]):] | ||||
ol = 1 | ||||
elif ol and not lines[i].strip() and i<len(lines)-1 \ | ||||
and not lines[i+1].startswith(' ') \ | ||||
and not re.search(r'^(\d+\.)',lines[i+1]): | ||||
lines.insert(i,'</ol>') | ||||
i += 1 | ||||
ol = 0 | ||||
i += 1 | ||||
if ul: | ||||
lines.append('</ul>'*ul) | ||||
if ol: | ||||
lines.append('</ol>'*ol) | ||||
if bq: | ||||
lines.append('</blockquote>'*bq) | ||||
t1 = time.time() | ||||
#print('part 1', t1-t0) | ||||
sections = [] | ||||
scripts = [] | ||||
section = Marked() | ||||
i = 0 | ||||
while i<len(lines): | ||||
line = lines[i] | ||||
if line.strip() and line.startswith(' '): | ||||
if isinstance(section,Marked) and section.line: | ||||
sections.append(section) | ||||
section = CodeBlock(line[4:]) | ||||
j = i+1 | ||||
while j<len(lines) and lines[j].startswith(' '): | ||||
section.lines.append(lines[j][4:]) | ||||
j += 1 | ||||
sections.append(section) | ||||
section = Marked() | ||||
i = j | ||||
continue | ||||
elif line.strip() and line.startswith("```"): | ||||
# fenced code blocks à la Github Flavoured Markdown | ||||
if isinstance(section,Marked) and section.line: | ||||
sections.append(section) | ||||
section = CodeBlock(line) | ||||
j = i+1 | ||||
while j<len(lines) and not lines[j].startswith("```"): | ||||
section.lines.append(lines[j]) | ||||
j += 1 | ||||
sections.append(section) | ||||
section = Marked() | ||||
i = j+1 | ||||
continue | ||||
elif line.lower().startswith('<script'): | ||||
if isinstance(section,Marked) and section.line: | ||||
sections.append(section) | ||||
section = Marked() | ||||
j = i+1 | ||||
while j<len(lines): | ||||
if lines[j].lower().startswith('</script>'): | ||||
scripts.append('\n'.join(lines[i+1:j])) | ||||
for k in range(i,j+1): | ||||
lines[k] = '' | ||||
break | ||||
j += 1 | ||||
i = j | ||||
continue | ||||
# atext header | ||||
elif line.startswith('#'): | ||||
level = 1 | ||||
line = lines[i] | ||||
while level<len(line) and line[level]=='#' and level<=6: | ||||
level += 1 | ||||
if not line[level+1:].strip(): | ||||
if level==1: | ||||
i += 1 | ||||
continue | ||||
else: | ||||
lines[i] = '<H%s>%s</H%s>\n' %(level-1,'#',level-1) | ||||
else: | ||||
lines[i] = '<H%s>%s</H%s>\n' %(level,line[level+1:],level) | ||||
else: | ||||
mo = re.search(ref_pattern,line) | ||||
if mo is not None: | ||||
if isinstance(section,Marked) and section.line: | ||||
sections.append(section) | ||||
section = Marked() | ||||
key = mo.groups()[0] | ||||
value = URL(mo.groups()[1]) | ||||
refs[key.lower()] = value | ||||
else: | ||||
if not line.strip(): | ||||
line = '<p></p>' | ||||
if section.line: | ||||
section.line += '\n' | ||||
section.line += line | ||||
i += 1 | ||||
t2 = time.time() | ||||
#print('section 2', t2-t1) | ||||
if isinstance(section,Marked) and section.line: | ||||
sections.append(section) | ||||
res = '' | ||||
for section in sections: | ||||
mk,_scripts = section.to_html() | ||||
res += mk | ||||
scripts += _scripts | ||||
#print('end mark', time.time()-t2) | ||||
return res,scripts | ||||
def escape(czone): | ||||
czone = czone.replace('&','&') | ||||
czone = czone.replace('<','<') | ||||
czone = czone.replace('>','>') | ||||
czone = czone.replace('_','_') | ||||
czone = czone.replace('*','*') | ||||
return czone | ||||
def s_escape(mo): | ||||
# used in re.sub | ||||
czone = mo.string[mo.start():mo.end()] | ||||
return escape(czone) | ||||
def unmark(code_zone): | ||||
# convert _ to _ inside inline code | ||||
code_zone = code_zone.replace('_','_') | ||||
return code_zone | ||||
def s_unmark(mo): | ||||
# convert _ to _ inside inline code | ||||
code_zone = mo.string[mo.start():mo.end()] | ||||
code_zone = code_zone.replace('_','_') | ||||
return code_zone | ||||
def apply_markdown(src): | ||||
scripts = [] | ||||
key = None | ||||
t0 = time.time() | ||||
i = 0 | ||||
while i<len(src): | ||||
if src[i]=='[': | ||||
start_a = i+1 | ||||
while True: | ||||
end_a = src.find(']',i) | ||||
if end_a == -1: | ||||
break | ||||
if src[end_a-1]=='\\': | ||||
i = end_a+1 | ||||
else: | ||||
break | ||||
if end_a>-1 and src[start_a:end_a].find('\n')==-1: | ||||
link = src[start_a:end_a] | ||||
rest = src[end_a+1:].lstrip() | ||||
if rest and rest[0]=='(': | ||||
j = 0 | ||||
while True: | ||||
end_href = rest.find(')',j) | ||||
if end_href == -1: | ||||
break | ||||
if rest[end_href-1]=='\\': | ||||
j = end_href+1 | ||||
else: | ||||
break | ||||
if end_href>-1 and rest[:end_href].find('\n')==-1: | ||||
tag = '<a href="'+rest[1:end_href]+'">'+link+'</a>' | ||||
src = src[:start_a-1]+tag+rest[end_href+1:] | ||||
i = start_a+len(tag) | ||||
elif rest and rest[0]=='[': | ||||
j = 0 | ||||
while True: | ||||
end_key = rest.find(']',j) | ||||
if end_key == -1: | ||||
break | ||||
if rest[end_key-1]=='\\': | ||||
j = end_key+1 | ||||
else: | ||||
break | ||||
if end_key>-1 and rest[:end_key].find('\n')==-1: | ||||
if not key: | ||||
key = link | ||||
if key.lower() not in refs: | ||||
raise KeyError('unknown reference %s' %key) | ||||
url = refs[key.lower()] | ||||
tag = '<a href="'+url+'">'+link+'</a>' | ||||
src = src[:start_a-1]+tag+rest[end_key+1:] | ||||
i = start_a+len(tag) | ||||
i += 1 | ||||
t1 = time.time() | ||||
#print('apply markdown 1', t1-t0) | ||||
# before applying the markup with _ and *, isolate HTML tags because | ||||
# they can contain these characters | ||||
# We replace them temporarily by a random string | ||||
rstr = ''.join(random.choice(letters) for i in range(16)) | ||||
i = 0 | ||||
state = None | ||||
start = -1 | ||||
data = '' | ||||
tags = [] | ||||
while i<len(src): | ||||
if src[i]=='<': | ||||
j = i+1 | ||||
while j<len(src): | ||||
if src[j]=='"' or src[j]=="'": | ||||
if state==src[j] and src[j-1]!='\\': | ||||
state = None | ||||
j = start+len(data)+1 | ||||
data = '' | ||||
elif state==None: | ||||
state = src[j] | ||||
start = j | ||||
else: | ||||
data += src[j] | ||||
elif src[j]=='>' and state is None: | ||||
tags.append(src[i:j+1]) | ||||
src = src[:i]+rstr+src[j+1:] | ||||
i += len(rstr) | ||||
break | ||||
elif state=='"' or state=="'": | ||||
data += src[j] | ||||
elif src[j]=='\n': | ||||
# if a sign < is not followed by > in the same ligne, it | ||||
# is the sign "lesser than" | ||||
src = src[:i]+'<'+src[i+1:] | ||||
j=i+4 | ||||
break | ||||
j += 1 | ||||
elif src[i]=='`' and i>0 and src[i-1]!='\\': | ||||
# ignore the content of inline code | ||||
j = i+1 | ||||
while j<len(src): | ||||
if src[j]=='`' and src[j-1]!='\\': | ||||
break | ||||
j += 1 | ||||
i = j | ||||
i += 1 | ||||
t2 = time.time() | ||||
#print('apply markdown 2', len(src), t2-t1) | ||||
# escape "<", ">", "&" and "_" in inline code | ||||
code_pattern = r'\`(.*?)\`' | ||||
src = re.sub(code_pattern,s_escape,src) | ||||
# replace escaped ` _ * by HTML characters | ||||
src = src.replace(r'\\`','`') | ||||
src = src.replace(r'\_','_') | ||||
src = src.replace(r'\*','*') | ||||
# emphasis | ||||
strong_patterns = [('STRONG',r'\*\*(.*?)\*\*'),('B',r'__(.*?)__')] | ||||
for tag,strong_pattern in strong_patterns: | ||||
src = re.sub(strong_pattern,r'<%s>\1</%s>' %(tag,tag),src) | ||||
em_patterns = [('EM',r'\*(.*?)\*'),('I',r'\_(.*?)\_')] | ||||
for tag,em_pattern in em_patterns: | ||||
src = re.sub(em_pattern,r'<%s>\1</%s>' %(tag,tag),src) | ||||
# inline code | ||||
code_pattern = r'\`(.*?)\`' | ||||
src = re.sub(code_pattern,r'<code>\1</code>',src) | ||||
# restore tags | ||||
while True: | ||||
pos = src.rfind(rstr) | ||||
if pos==-1: | ||||
break | ||||
repl = tags.pop() | ||||
src = src[:pos]+repl+src[pos+len(rstr):] | ||||
src = '<p>'+src+'</p>' | ||||
t3 = time.time() | ||||
#print('apply markdown 3', t3-t2) | ||||
return src,scripts | ||||