You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

638 lines
18 KiB

2 months ago
<?php
#
#
# Parsedown Extra
# https://github.com/erusev/parsedown-extra
#
# (c) Emanuil Rusev
# http://erusev.com
#
# For the full license information, view the LICENSE file that was distributed
# with this source code.
#
#
class ParsedownExtra extends Parsedown
{
# ~
public const version = '0.8.0-beta-1';
# ~
public function __construct()
{
if (version_compare(parent::version, '1.7.1') < 0) {
throw new Exception('ParsedownExtra requires a later version of Parsedown');
}
$this->BlockTypes[':'] []= 'DefinitionList';
$this->BlockTypes['*'] []= 'Abbreviation';
# identify footnote definitions before reference definitions
array_unshift($this->BlockTypes['['], 'Footnote');
# identify footnote markers before before links
array_unshift($this->InlineTypes['['], 'FootnoteMarker');
}
#
# ~
public function text($text)
{
$Elements = $this->textElements($text);
# convert to markup
$markup = $this->elements($Elements);
# trim line breaks
$markup = trim($markup, "\n");
# merge consecutive dl elements
$markup = preg_replace('/<\/dl>\s+<dl>\s+/', '', $markup);
# add footnotes
if (isset($this->DefinitionData['Footnote'])) {
$Element = $this->buildFootnoteElement();
$markup .= "\n" . $this->element($Element);
}
return $markup;
}
#
# Blocks
#
#
# Abbreviation
protected function blockAbbreviation($Line)
{
if (preg_match('/^\*\[(.+?)\]:[ ]*(.+?)[ ]*$/', $Line['text'], $matches)) {
$this->DefinitionData['Abbreviation'][$matches[1]] = $matches[2];
$Block = array(
'hidden' => true,
);
return $Block;
}
}
#
# Footnote
protected function blockFootnote($Line)
{
if (preg_match('/^\[\^(.+?)\]:[ ]?(.*)$/', $Line['text'], $matches)) {
$Block = array(
'label' => $matches[1],
'text' => $matches[2],
'hidden' => true,
);
return $Block;
}
}
protected function blockFootnoteContinue($Line, $Block)
{
if ($Line['text'][0] === '[' and preg_match('/^\[\^(.+?)\]:/', $Line['text'])) {
return;
}
if (isset($Block['interrupted'])) {
if ($Line['indent'] >= 4) {
$Block['text'] .= "\n\n" . $Line['text'];
return $Block;
}
} else {
$Block['text'] .= "\n" . $Line['text'];
return $Block;
}
}
protected function blockFootnoteComplete($Block)
{
$this->DefinitionData['Footnote'][$Block['label']] = array(
'text' => $Block['text'],
'count' => null,
'number' => null,
);
return $Block;
}
#
# Definition List
protected function blockDefinitionList($Line, $Block)
{
if (! isset($Block) or $Block['type'] !== 'Paragraph') {
return;
}
$Element = array(
'name' => 'dl',
'elements' => array(),
);
$terms = explode("\n", $Block['element']['handler']['argument']);
foreach ($terms as $term) {
$Element['elements'] []= array(
'name' => 'dt',
'handler' => array(
'function' => 'lineElements',
'argument' => $term,
'destination' => 'elements'
),
);
}
$Block['element'] = $Element;
$Block = $this->addDdElement($Line, $Block);
return $Block;
}
protected function blockDefinitionListContinue($Line, array $Block)
{
if ($Line['text'][0] === ':') {
$Block = $this->addDdElement($Line, $Block);
return $Block;
} else {
if (isset($Block['interrupted']) and $Line['indent'] === 0) {
return;
}
if (isset($Block['interrupted'])) {
$Block['dd']['handler']['function'] = 'textElements';
$Block['dd']['handler']['argument'] .= "\n\n";
$Block['dd']['handler']['destination'] = 'elements';
unset($Block['interrupted']);
}
$text = substr($Line['body'], min($Line['indent'], 4));
$Block['dd']['handler']['argument'] .= "\n" . $text;
return $Block;
}
}
#
# Header
protected function blockHeader($Line)
{
$Block = parent::blockHeader($Line);
if (preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE)) {
$attributeString = $matches[1][0];
$Block['element']['attributes'] = $this->parseAttributeData($attributeString);
$Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]);
}
return $Block;
}
#
# Markup
protected function blockMarkup($Line)
{
if ($this->markupEscaped or $this->safeMode) {
return;
}
if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) {
$element = strtolower($matches[1]);
if (in_array($element, $this->textLevelElements)) {
return;
}
$Block = array(
'name' => $matches[1],
'depth' => 0,
'element' => array(
'rawHtml' => $Line['text'],
'autobreak' => true,
),
);
$length = strlen($matches[0]);
$remainder = substr($Line['text'], $length);
if (trim($remainder) === '') {
if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) {
$Block['closed'] = true;
$Block['void'] = true;
}
} else {
if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) {
return;
}
if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) {
$Block['closed'] = true;
}
}
return $Block;
}
}
protected function blockMarkupContinue($Line, array $Block)
{
if (isset($Block['closed'])) {
return;
}
if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) { # open
$Block['depth'] ++;
}
if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) { # close
if ($Block['depth'] > 0) {
$Block['depth'] --;
} else {
$Block['closed'] = true;
}
}
if (isset($Block['interrupted'])) {
$Block['element']['rawHtml'] .= "\n";
unset($Block['interrupted']);
}
$Block['element']['rawHtml'] .= "\n".$Line['body'];
return $Block;
}
protected function blockMarkupComplete($Block)
{
if (! isset($Block['void'])) {
$Block['element']['rawHtml'] = $this->processTag($Block['element']['rawHtml']);
}
return $Block;
}
#
# Setext
protected function blockSetextHeader($Line, array $Block = null)
{
$Block = parent::blockSetextHeader($Line, $Block);
if (preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE)) {
$attributeString = $matches[1][0];
$Block['element']['attributes'] = $this->parseAttributeData($attributeString);
$Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]);
}
return $Block;
}
#
# Inline Elements
#
#
# Footnote Marker
protected function inlineFootnoteMarker($Excerpt)
{
if (preg_match('/^\[\^(.+?)\]/', $Excerpt['text'], $matches)) {
$name = $matches[1];
if (! isset($this->DefinitionData['Footnote'][$name])) {
return;
}
$this->DefinitionData['Footnote'][$name]['count'] ++;
if (! isset($this->DefinitionData['Footnote'][$name]['number'])) {
$this->DefinitionData['Footnote'][$name]['number'] = ++ $this->footnoteCount; # » &
}
$Element = array(
'name' => 'sup',
'attributes' => array('id' => 'fnref'.$this->DefinitionData['Footnote'][$name]['count'].':'.$name),
'element' => array(
'name' => 'a',
'attributes' => array('href' => '#fn:'.$name, 'class' => 'footnote-ref'),
'text' => $this->DefinitionData['Footnote'][$name]['number'],
),
);
return array(
'extent' => strlen($matches[0]),
'element' => $Element,
);
}
}
private $footnoteCount = 0;
#
# Link
protected function inlineLink($Excerpt)
{
$Link = parent::inlineLink($Excerpt);
$remainder = substr($Excerpt['text'], $Link['extent']);
if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches)) {
$Link['element']['attributes'] += $this->parseAttributeData($matches[1]);
$Link['extent'] += strlen($matches[0]);
}
return $Link;
}
#
# ~
#
private $currentAbreviation;
private $currentMeaning;
protected function insertAbreviation(array $Element)
{
if (isset($Element['text'])) {
$Element['elements'] = self::pregReplaceElements(
'/\b'.preg_quote($this->currentAbreviation, '/').'\b/',
array(
array(
'name' => 'abbr',
'attributes' => array(
'title' => $this->currentMeaning,
),
'text' => $this->currentAbreviation,
)
),
$Element['text']
);
unset($Element['text']);
}
return $Element;
}
protected function inlineText($text)
{
$Inline = parent::inlineText($text);
if (isset($this->DefinitionData['Abbreviation'])) {
foreach ($this->DefinitionData['Abbreviation'] as $abbreviation => $meaning) {
$this->currentAbreviation = $abbreviation;
$this->currentMeaning = $meaning;
$Inline['element'] = $this->elementApplyRecursiveDepthFirst(
array($this, 'insertAbreviation'),
$Inline['element']
);
}
}
return $Inline;
}
#
# Util Methods
#
protected function addDdElement(array $Line, array $Block)
{
$text = substr($Line['text'], 1);
$text = trim($text);
unset($Block['dd']);
$Block['dd'] = array(
'name' => 'dd',
'handler' => array(
'function' => 'lineElements',
'argument' => $text,
'destination' => 'elements'
),
);
if (isset($Block['interrupted'])) {
$Block['dd']['handler']['function'] = 'textElements';
unset($Block['interrupted']);
}
$Block['element']['elements'] []= & $Block['dd'];
return $Block;
}
protected function buildFootnoteElement()
{
$Element = array(
'name' => 'div',
'attributes' => array('class' => 'footnotes'),
'elements' => array(
array('name' => 'hr'),
array(
'name' => 'ol',
'elements' => array(),
),
),
);
uasort($this->DefinitionData['Footnote'], [$this,'sortFootnotes']);
foreach ($this->DefinitionData['Footnote'] as $definitionId => $DefinitionData) {
if (! isset($DefinitionData['number'])) {
continue;
}
$text = $DefinitionData['text'];
$textElements = parent::textElements($text);
$numbers = range(1, $DefinitionData['count']);
$backLinkElements = array();
foreach ($numbers as $number) {
$backLinkElements[] = array('text' => ' ');
$backLinkElements[] = array(
'name' => 'a',
'attributes' => array(
'href' => "#fnref$number:$definitionId",
'rev' => 'footnote',
'class' => 'footnote-backref',
),
'rawHtml' => '&#8617;',
'allowRawHtmlInSafeMode' => true,
'autobreak' => false,
);
}
unset($backLinkElements[0]);
$n = count($textElements) -1;
if ($textElements[$n]['name'] === 'p') {
$backLinkElements = array_merge(
array(
array(
'rawHtml' => '&#160;',
'allowRawHtmlInSafeMode' => true,
),
),
$backLinkElements
);
unset($textElements[$n]['name']);
$textElements[$n] = array(
'name' => 'p',
'elements' => array_merge(
array($textElements[$n]),
$backLinkElements
),
);
} else {
$textElements[] = array(
'name' => 'p',
'elements' => $backLinkElements
);
}
$Element['elements'][1]['elements'] []= array(
'name' => 'li',
'attributes' => array('id' => 'fn:'.$definitionId),
'elements' => array_merge(
$textElements
),
);
}
return $Element;
}
# ~
protected function parseAttributeData($attributeString)
{
$Data = array();
$attributes = preg_split('/[ ]+/', $attributeString, - 1, PREG_SPLIT_NO_EMPTY);
foreach ($attributes as $attribute) {
if ($attribute[0] === '#') {
$Data['id'] = substr($attribute, 1);
} else { # "."
$classes []= substr($attribute, 1);
}
}
if (isset($classes)) {
$Data['class'] = implode(' ', $classes);
}
return $Data;
}
# ~
protected function processTag($elementMarkup) # recursive
{
# http://stackoverflow.com/q/1148928/200145
libxml_use_internal_errors(true);
$DOMDocument = new DOMDocument();
// Migrating away from `mb_convert_encoding($elementMarkup,
//'HTML-ENTITIES', 'UTF-8');` has caused multibyte characters like
// emojis not to be converted into entities, which is needed so that
// the `DOM` extension can properly parse the markup.
// The following line works like this: It treats the input string
// as UTF-8 and converts every Unicode character with 8 or more bits
// (= character code starting at 128 or 0x80 up to the Unicode limit
// of 0x10ffff) to an entity; the third and fourth arguments for the
// map are not needed for our use case and are set to the default values
// (no offset and a full mask)
// [http://stackoverflow.com/q/11309194/200145]
$elementMarkup = mb_encode_numericentity($elementMarkup, [0x80, 0x10ffff, 0, 0xffffff], 'UTF-8');
# Ensure that saveHTML() is not remove new line characters. New lines will be split by this character.
$DOMDocument->formatOutput = true;
# http://stackoverflow.com/q/4879946/200145
$DOMDocument->loadHTML($elementMarkup);
$DOMDocument->removeChild($DOMDocument->doctype);
$DOMDocument->replaceChild($DOMDocument->firstChild->firstChild->firstChild, $DOMDocument->firstChild);
$elementText = '';
if ($DOMDocument->documentElement->getAttribute('markdown') === '1') {
foreach ($DOMDocument->documentElement->childNodes as $Node) {
$elementText .= $DOMDocument->saveHTML($Node);
}
$DOMDocument->documentElement->removeAttribute('markdown');
$elementText = "\n".$this->text($elementText)."\n";
} else {
foreach ($DOMDocument->documentElement->childNodes as $Node) {
$nodeMarkup = $DOMDocument->saveHTML($Node);
if ($Node instanceof DOMElement and ! in_array($Node->nodeName, $this->textLevelElements)) {
$elementText .= $this->processTag($nodeMarkup);
} else {
$elementText .= $nodeMarkup;
}
}
}
# because we don't want for markup to get encoded
$DOMDocument->documentElement->nodeValue = 'placeholder\x1A';
$markup = $DOMDocument->saveHTML($DOMDocument->documentElement);
$markup = str_replace('placeholder\x1A', $elementText, $markup);
return $markup;
}
# ~
protected function sortFootnotes($A, $B) # callback
{
return $A['number'] - $B['number'];
}
#
# Fields
#
protected $regexAttribute = '(?:[#.][-\w]+[ ]*)';
}