#
# Original SmartyPants
# Copyright (c) 2003-2004 John Gruber
#
or tags.
$prev_token_last_char = ""; # This is a cheat, used to get some context
# for one-character tokens that consist of
# just a quote char. What we do is remember
# the last character of the previous text
# token, to use as context to curl single-
# character quote tokens correctly.
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match('@<(/?)(?:'.$this->tags_to_skip.')[\s>]@', $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
$last_char = substr($t, -1); # Remember last char of this token before processing.
if (! $in_pre) {
$t = $this->educate($t, $prev_token_last_char);
}
$prev_token_last_char = $last_char;
$result .= $t;
}
}
return $result;
}
function decodeEntitiesInConfiguration() {
#
# Utility function that converts entities in configuration variables to
# UTF-8 characters.
#
$output_config_vars = array(
'smart_doublequote_open',
'smart_doublequote_close',
'smart_singlequote_open',
'smart_singlequote_close',
'backtick_doublequote_open',
'backtick_doublequote_close',
'backtick_singlequote_open',
'backtick_singlequote_close',
'em_dash',
'en_dash',
'ellipsis',
);
foreach ($output_config_vars as $var) {
$this->$var = html_entity_decode($this->$var);
}
}
protected function educate($t, $prev_token_last_char) {
$t = $this->processEscapes($t);
if ($this->convert_quot) {
$t = preg_replace('/"/', '"', $t);
}
if ($this->do_dashes) {
if ($this->do_dashes == 1) $t = $this->educateDashes($t);
if ($this->do_dashes == 2) $t = $this->educateDashesOldSchool($t);
if ($this->do_dashes == 3) $t = $this->educateDashesOldSchoolInverted($t);
}
if ($this->do_ellipses) $t = $this->educateEllipses($t);
# Note: backticks need to be processed before quotes.
if ($this->do_backticks) {
$t = $this->educateBackticks($t);
if ($this->do_backticks == 2) $t = $this->educateSingleBackticks($t);
}
if ($this->do_quotes) {
if ($t == "'") {
# Special case: single-character ' token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = $this->smart_singlequote_close;
}
else {
$t = $this->smart_singlequote_open;
}
}
else if ($t == '"') {
# Special case: single-character " token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = $this->smart_doublequote_close;
}
else {
$t = $this->smart_doublequote_open;
}
}
else {
# Normal case:
$t = $this->educateQuotes($t);
}
}
if ($this->do_stupefy) $t = $this->stupefyEntities($t);
return $t;
}
protected function educateQuotes($_) {
#
# Parameter: String.
#
# Returns: The string, with "educated" curly quote HTML entities.
#
# Example input: "Isn't this fun?"
# Example output: “Isn’t this fun?”
#
$dq_open = $this->smart_doublequote_open;
$dq_close = $this->smart_doublequote_close;
$sq_open = $this->smart_singlequote_open;
$sq_close = $this->smart_singlequote_close;
# Make our own "punctuation" character class, because the POSIX-style
# [:PUNCT:] is only available in Perl 5.6 or later:
$punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
# Special case if the very first character is a quote
# followed by punctuation at a non-word-break. Close the quotes by brute force:
$_ = preg_replace(
array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
array($sq_close, $dq_close), $_);
# Special case for double sets of quotes, e.g.:
# He said, "'Quoted' words in a larger quote."
$_ = preg_replace(
array("/\"'(?=\w)/", "/'\"(?=\w)/"),
array($dq_open.$sq_open, $sq_open.$dq_open), $_);
# Special case for decade abbreviations (the '80s):
$_ = preg_replace("/'(?=\\d{2}s)/", $sq_close, $_);
$close_class = '[^\ \t\r\n\[\{\(\-]';
$dec_dashes = '&\#8211;|&\#8212;';
# Get most opening single quotes:
$_ = preg_replace("{
(
\\s | # a whitespace char, or
| # a non-breaking space entity, or
-- | # dashes, or
&[mn]dash; | # named dash entities
$dec_dashes | # or decimal entities
&\\#x201[34]; # or hex
)
' # the quote
(?=\\w) # followed by a word character
}x", '\1'.$sq_open, $_);
# Single closing quotes:
$_ = preg_replace("{
($close_class)?
'
(?(1)| # If $1 captured, then do nothing;
(?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
) # char or an 's' at a word ending position. This
# is a special case to handle something like:
# \"Custer's Last Stand.\"
}xi", '\1'.$sq_close, $_);
# Any remaining single quotes should be opening ones:
$_ = str_replace("'", $sq_open, $_);
# Get most opening double quotes:
$_ = preg_replace("{
(
\\s | # a whitespace char, or
| # a non-breaking space entity, or
-- | # dashes, or
&[mn]dash; | # named dash entities
$dec_dashes | # or decimal entities
&\\#x201[34]; # or hex
)
\" # the quote
(?=\\w) # followed by a word character
}x", '\1'.$dq_open, $_);
# Double closing quotes:
$_ = preg_replace("{
($close_class)?
\"
(?(1)|(?=\\s)) # If $1 captured, then do nothing;
# if not, then make sure the next char is whitespace.
}x", '\1'.$dq_close, $_);
# Any remaining quotes should be opening ones.
$_ = str_replace('"', $dq_open, $_);
return $_;
}
protected function educateBackticks($_) {
#
# Parameter: String.
# Returns: The string, with ``backticks'' -style double quotes
# translated into HTML curly quote entities.
#
# Example input: ``Isn't this fun?''
# Example output: “Isn't this fun?”
#
$_ = str_replace(array("``", "''",),
array($this->backtick_doublequote_open,
$this->backtick_doublequote_close), $_);
return $_;
}
protected function educateSingleBackticks($_) {
#
# Parameter: String.
# Returns: The string, with `backticks' -style single quotes
# translated into HTML curly quote entities.
#
# Example input: `Isn't this fun?'
# Example output: ‘Isn’t this fun?’
#
$_ = str_replace(array("`", "'",),
array($this->backtick_singlequote_open,
$this->backtick_singlequote_close), $_);
return $_;
}
protected function educateDashes($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an em-dash HTML entity.
#
$_ = str_replace('--', $this->em_dash, $_);
return $_;
}
protected function educateDashesOldSchool($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an en-dash HTML entity, and each "---" translated to
# an em-dash HTML entity.
#
# em en
$_ = str_replace(array("---", "--",),
array($this->em_dash, $this->en_dash), $_);
return $_;
}
protected function educateDashesOldSchoolInverted($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an em-dash HTML entity, and each "---" translated to
# an en-dash HTML entity. Two reasons why: First, unlike the
# en- and em-dash syntax supported by
# EducateDashesOldSchool(), it's compatible with existing
# entries written before SmartyPants 1.1, back when "--" was
# only used for em-dashes. Second, em-dashes are more
# common than en-dashes, and so it sort of makes sense that
# the shortcut should be shorter to type. (Thanks to Aaron
# Swartz for the idea.)
#
# en em
$_ = str_replace(array("---", "--",),
array($this->en_dash, $this->em_dash), $_);
return $_;
}
protected function educateEllipses($_) {
#
# Parameter: String.
# Returns: The string, with each instance of "..." translated to
# an ellipsis HTML entity. Also converts the case where
# there are spaces between the dots.
#
# Example input: Huh...?
# Example output: Huh…?
#
$_ = str_replace(array("...", ". . .",), $this->ellipsis, $_);
return $_;
}
protected function stupefyEntities($_) {
#
# Parameter: String.
# Returns: The string, with each SmartyPants HTML entity translated to
# its ASCII counterpart.
#
# Example input: “Hello — world.”
# Example output: "Hello -- world."
#
# en-dash em-dash
$_ = str_replace(array('–', '—'),
array('-', '--'), $_);
# single quote open close
$_ = str_replace(array('‘', '’'), "'", $_);
# double quote open close
$_ = str_replace(array('“', '”'), '"', $_);
$_ = str_replace('…', '...', $_); # ellipsis
return $_;
}
protected function processEscapes($_) {
#
# Parameter: String.
# Returns: The string, with after processing the following backslash
# escape sequences. This is useful if you want to force a "dumb"
# quote or other character to appear.
#
# Escape Value
# ------ -----
# \\ \
# \" "
# \' '
# \. .
# \- -
# \` `
#
$_ = str_replace(
array('\\\\', '\"', "\'", '\.', '\-', '\`'),
array('\', '"', ''', '.', '-', '`'), $_);
return $_;
}
protected function tokenizeHTML($str) {
#
# Parameter: String containing HTML markup.
# Returns: An array of the tokens comprising the input
# string. Each token is either a tag (possibly with nested,
# tags contained therein, such as , or a
# run of text between tags. Each element of the array is a
# two-element array; the first is either 'tag' or 'text';
# the second is the actual value.
#
#
# Regular expression derived from the _tokenize() subroutine in
# Brad Choate's MTRegex plugin.
#
#
$index = 0;
$tokens = array();
$match = '(?s:)|'. # comment
'(?s:<\?.*?\?>)|'. # processing instruction
# regular tags
'(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (++$index % 2 && $part != '')
$tokens[] = array('text', $part);
else
$tokens[] = array('tag', $part);
}
return $tokens;
}
}