#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ********************************************************************************
# Copyright © 2017-2020 jianglin
# File Name: document.py
# Author: jianglin
# Email: mail@honmaple.com
# Created: 2018-02-26 11:44:43 (CST)
# Last Update: Wednesday 2020-08-19 12:00:03 (CST)
# Description:
# ********************************************************************************
import re
from hashlib import sha1
from textwrap import dedent
from .inline import Blankline, Hr, InlineText
from .src import highlight as src_highlight
DRAWER_BEGIN_REGEXP = re.compile(r"^(\s*):(\S+):\s*$")
DRAWER_END_REGEXP = re.compile(r"^(\s*):END:\s*$")
DRAWER_PROPERTY_REGEXP = re.compile(r"^(\s*):(\S+):(\s+(.*)$|$)")
BLOCK_BEGIN_REGEXP = re.compile(r"(?i)^(\s*)#\+BEGIN_(\w+)(.*)")
BLOCK_END_REGEXP = re.compile(r"(?i)^(\s*)#\+END_(\w+)")
BLOCK_RESULT_REGEXP = re.compile(r"(?i)^(\s*)#\+RESULTS:")
BLOCK_RESULT_CONTENT_REGEXP = re.compile(r"(?:^|\s+):(\s+(.*)|$)")
TABLE_SEP_REGEXP = re.compile(r"^(\s*)(\|[+-|]*)\s*$")
TABLE_ROW_REGEXP = re.compile(r"^(\s*)(\|.*)")
TABLE_ALIGN_REGEXP = re.compile(r"^<(l|c|r)>$")
LIST_DESCRIPTIVE_REGEXP = re.compile(r"^(\s*)([+*-])\s+(.*)::(\s|$)")
LIST_UNORDER_REGEXP = re.compile(r"^(\s*)([+*-])(\s+(.*)|$)")
LIST_ORDER_REGEXP = re.compile(r"^(\s*)(([0-9]+|[a-zA-Z])[.)])(\s+(.*)|$)")
LIST_STATUS_REGEXP = re.compile(r"\[( |X|-)\]\s")
LIST_LEVEL_REGEXP = re.compile(r"(\s*)(.+)$")
HEADLINE_REGEXP = re.compile(
r"^(\*+)(?:\s+(.+?))?(?:\s+\[#(.+)\])?(\s+.*?)(?:\s+:(.+):)?$")
KEYWORD_REGEXP = re.compile(r"^(\s*)#\+([^:]+):(\s+(.*)|$)")
COMMENT_REGEXP = re.compile(r"^(\s*)#(.*)")
ATTRIBUTE_REGEXP = re.compile(r"(?:^|\s+)(:[-\w]+)\s+(.*)$")
TODO_KEYWORDS = ("DONE", "TODO")
def string_split(s, sep):
if not s:
return []
return s.split(sep)
class Parser(object):
def __init__(self, content=""):
self.lines = content.splitlines()
self.level = 0
self.element = ""
self.children = []
self.escape = True
self.needparse = True
self.parsed_nodes = (
"blankline",
"headline",
"table",
"list",
"drawer",
"block",
"block_result",
"keyword",
"hr",
)
def first_child(self):
if len(self.children) == 0:
return
return self.children[0]
def last_child(self):
if len(self.children) == 0:
return
return self.children[-1]
def add_child(self, node):
last = self.last_child()
if self.is_headline(last):
if self.is_properties(node):
last.properties = node
return
if not self.is_headline(node):
last.add_child(node)
return
if self.is_headline(node) and node.stars > last.stars:
last.add_child(node)
return
if self.is_table(last):
if self.is_table(node):
last.add_child(node)
return
if self.is_list(last):
if self.is_blankline(node):
last.add_child(node)
return
if node.level > last.level:
last.add_child(node)
return
if self.is_list(node) and node.level == last.level:
last.add_child(node)
return
if self.is_keyword(last):
if self.is_table(node):
node.keyword = last
if self.is_paragraph(last):
if self.is_inlinetext(node):
last.add_child(node)
return
if self.is_inlinetext(node):
self.children.append(self.paragraph(node))
return
self.children.append(node)
def is_keyword(self, child):
return child and isinstance(child, Keyword)
def is_headline(self, child):
return child and isinstance(child, Headline)
def is_list(self, child):
return child and isinstance(child, List)
def is_table(self, child):
return child and isinstance(child, Table)
def is_src(self, child):
return child and isinstance(child, (Src, Example))
def is_inlinetext(self, child):
return child and isinstance(child, InlineText)
def is_blankline(self, child):
return child and isinstance(child, Blankline)
def is_paragraph(self, child):
return child and isinstance(child, Paragraph)
def is_properties(self, child):
return child and isinstance(child, Properties)
def inlinetext(self, text):
return InlineText(text, self.needparse, self.escape)
def paragraph(self, node):
n = Paragraph()
n.add_child(node)
return n
def _parse_paired(self, cls, index, lines):
node = cls.match(lines[index])
if not node:
return None, index
end = len(lines)
num = index + 1
while num < end:
if node.matchend(num, lines):
node.preparse(lines[index + 1:num])
return node, num
num += 1
return None, index
def _parse_nopaired(self, cls, index, lines):
node = cls.match(lines[index])
if not node:
return None, index
end = len(lines)
num = index + 1
while num < end:
if node.matchend(num, lines):
break
num += 1
node.preparse(lines[index + 1:num])
return node, num
def parse_headline(self, index, lines):
return Headline.match(lines[index]), index
def parse_list(self, index, lines):
return List.match(lines[index]), index
def parse_table(self, index, lines):
return self._parse_nopaired(Table, index, lines)
def parse_drawer(self, index, lines):
return self._parse_paired(Drawer, index, lines)
def parse_block(self, index, lines):
return self._parse_paired(Block, index, lines)
def parse_block_result(self, index, lines):
return self._parse_paired(BlockResult, index, lines)
def parse_blankline(self, index, lines):
return Blankline.match(lines[index]), index
def parse_keyword(self, index, lines):
return Keyword.match(lines[index]), index
def parse_hr(self, index, lines):
return Hr.match(lines[index]), index
def parse_inlinetext(self, index, lines):
return self.inlinetext(lines[index]), index
def parse(self, index, lines):
for b in self.parsed_nodes:
func = "parse_" + b
if not hasattr(self, func):
continue
block, num = getattr(self, func)(index, lines)
if not block:
continue
return block, num
return self.parse_inlinetext(index, lines)
def preparse(self, lines):
index = 0
while index < len(lines):
line = lines[index]
node, index = self.parse(index, lines)
if node:
node.level = len(line) - len(line.strip())
self.add_child(node)
index += 1
def to_html(self):
if len(self.children) == 0 and len(self.lines) > 0:
self.preparse(self.lines)
children = []
for child in self.children:
content = child.to_html()
if not content:
continue
children.append(content)
text = "\n".join(children)
if self.element:
return self.element.format(text)
return text
def __str__(self):
str_children = [str(child) for child in self.children]
return self.__class__.__name__ + '(' + ','.join(str_children) + ')'
def __repr__(self):
return self.__str__()
class Headline(Parser):
def __init__(
self,
title,
stars=1,
keyword=None,
priority=None,
tags=[],
todo_keywords=TODO_KEYWORDS):
super(Headline, self).__init__()
self.title = title
self.stars = stars
self.keyword = keyword
self.priority = priority
self.tags = tags
self.properties = None
self.todo_keywords = todo_keywords
@classmethod
def match(cls, line):
match = HEADLINE_REGEXP.match(line)
if not match:
return
stars = len(match[1])
keyword = match[2] or ""
priority = match[3] or ""
if keyword and not priority:
if len(keyword) >= 4 and keyword[0:2] == "[#":
priority = keyword[2:-1]
keyword = ""
title = keyword + match[4]
keyword = ""
return cls(
title,
stars,
keyword,
priority,
string_split(match[5], ":"),
)
def id(self):
hid = 'org-{0}'.format(sha1(self.title.encode()).hexdigest()[:10])
if self.properties:
return self.properties.get("CUSTOM_ID", hid)
return hid
def toc(self):
b = ""
if self.keyword:
b = b + "{0}".format(self.keyword)
if self.priority:
b = b + "{0}".format(self.priority)
b = b + self.inlinetext(self.title).to_html()
for tag in self.tags:
b = b + "{0}".format(tag)
return b.strip()
def to_html(self):
b = "
\n{0}\n
" def add_child(self, node): self.children.append(node) def to_html(self): children = [child.to_html() for child in self.children] return self.element.format("\n{0}\n" class Export(Block): def __init__(self, language="", params=""): super(Export, self).__init__("export", params) self.language = language self.escape = self.language.upper() != "HTML" self.parsed_nodes = () def to_html(self): if not self.escape: return super(Export, self).to_html() return "" class Src(Block): def __init__(self, language="", params="", highlight=False): super(Src, self).__init__("src", params) self.language = language self.highlight_code = highlight self.element = "
\n{1}\n" self.needparse = False self.escape = False self.parsed_nodes = () def add_child(self, node): self.children.append(node) def highlight(self, language, text): return src_highlight(language, text) def to_html(self): text = "\n".join([child.to_html() for child in self.children]) if self.highlight_code: return self.highlight(self.language, dedent(text)) if not self.language: return "
\n{0}\n".format(dedent(text)) return self.element.format(self.language, dedent(text)) class Example(Src): def __init__(self, params="", highlight=False): super(Example, self).__init__("example", params, highlight) self.name = "example" class BlockResult(Parser): def __init__(self): super(BlockResult, self).__init__() self.element = "
\n{0}\n" @classmethod def match(cls, line): match = BLOCK_RESULT_REGEXP.match(line) if not match: return return cls() def matchend(self, index, lines): return not BLOCK_RESULT_CONTENT_REGEXP.match(lines[index]) def parse(self, index, lines): match = BLOCK_RESULT_CONTENT_REGEXP.match(lines[index]) return self.inlinetext(match[2]), index class ListItem(Parser): def __init__(self, status=None, checkbox="HTML"): super(ListItem, self).__init__() self.status = status self.checkbox = checkbox self.element = "
\n{0}\n
" self.parsed_nodes = () def add_child(self, node): self.children.append(node) class Section(Parser): def __init__(self, headline): super(Section, self).__init__() self.headline = headline @property def stars(self): return self.headline.stars def add_child(self, node): last = self.last_child() if not last: self.children.append(node) return if node.stars > last.stars: last.add_child(node) return self.children.append(node) def to_html(self): text = "