NL2P operation completely new written, works now from front to back according to tags

This commit is contained in:
Stephan Brunker 2017-12-12 22:57:57 +01:00 committed by onli
parent b837adde3c
commit 43b9f607d3

View File

@ -269,16 +269,23 @@ class serendipity_event_nl2br extends serendipity_event
$element = $temp['element'];
if ($p_tags) {
// NL2P OPERATION
$this->isolationtags = $isolate;
$text = $eventData[$element];
if (!empty($text)) {
//Standardize line endings to Unix
$text = str_replace(array("\r\n", "\r"), "\n", $text);
if ($element == 'body' && isset($eventData['extended'])) {
//move newlines from body to extended
//move newlines from body to extended
if ($element == 'body' && isset($eventData['extended']))
{
$eventData['extended'] = str_repeat("\n",strspn($text,"\n",-1)) . $eventData['extended'];
$text = rtrim($text,"\n");
}
$eventData[$element] = $this->nl2p($text, $element);
$eventData[$element] = $this->nl2p($text);
}
// NL2BR OPERATION
} else if ($isolate) {
$eventData[$element] = $this->isolate($eventData[$element], '~[<\[](' . implode('|', $isolate) . ').*?[>\]].*?[<\[]/\1[>\]]~si');
$eventData[$element] = nl2br($eventData[$element]);
@ -325,17 +332,17 @@ class serendipity_event_nl2br extends serendipity_event
/* nl2br plugin start */
p.whitelinebottom {
p.wl_bottom {
margin-top: 0em;
margin-bottom: 1em;
}
p.whitelinetop {
p.wl_top {
margin-top: 1em;
margin-bottom: 0em;
}
p.whitelinetopbottom {
p.wl_top_bottom {
margin-top: 1em;
margin-bottom: 1em;
}
@ -387,215 +394,427 @@ p.break {
}
/** ====================================
* NL2P OPERATION
* ====================================
*/
/**
* Insert <p class="whiteline" at paragraphs ending with two newlines
* Insert <p class="break" at paragraphs ending with one or no nl
* Insert <p class="whitelinetop" at the first paragraph if starting with a nl
* Insert <p class="whitelinetopbottom" if the first paragraph is ending with two newlines
* @param string text
* @param boolean complex operations (not necessary when text is flat)
* @return string
*/
function nl2p(&$text, $element='any', $complex=true)
{
//check if string starts with a newline (extended only)
$startnl = ($element =='extended' && strspn($text,"\n")) ? true : false;
//trim whitespaces and line breaks
$text = trim($text);
//split into array
$text = str_split($text);
// following w3.org, these elements close p elements automatically:
var $block_elements = array('table','ul','ol','pre', 'dl',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'menu', 'section',
'address', 'article', 'aside', 'fieldset', 'footer',
'form', 'header', 'hgroup', 'hr', 'main', 'nav', 'p'
);
$big_p = '<p class="whiteline">';
$small_p = '<p class="break">';
var $nested_block_elements = array('div','table','blockquote');
var $singleton_elements = array('area', 'br', 'col', 'command', 'embed', 'hr',
'img', 'input', 'keygen', 'link', 'param', 'source',
'track', 'wbr', '!--'
);
$i = count($text) - 1;
$whiteline = false;
//main operation: convert \n to big_p and small_p
while ($i > 0) {
//search next /n enclosing text, starting at $i-1
$i = $this->next_nl_block($i, $text);
if ($i == 0) { //no newlines left
break;
} elseif ($whiteline == true) {
$text[$i] = '</p>' . $big_p;
} else {
$text[$i] = '</p>' . $small_p;
}
//look ahead for next paragraph class
if ($text[$i-1] === "\n") {
$whiteline = true;
$i--;
} else {
$whiteline = false;
}
}
if ($whiteline && $startnl) {
$start_tag = '<p class="whitelinetopbottom">';
} elseif ($startnl) {
$start_tag = '<p class="whitelinetop">';
} elseif ($whiteline) {
$start_tag = $big_p;
} else {
$start_tag = $small_p;
}
if ($complex) {
$textstring = $this->tidy_block_elements($text);
$textstring = $this->formate_block_elements($textstring);
$textstring = $this->isolate_block_elements($textstring);
$textstring = $start_tag . $textstring . '</p>';
return $this->clean_code($textstring);
}
return $start_tag . implode($text) . '</p>';
}
var $allowed_p_parents = array('blockquote', 'td', 'div', 'article', 'aside', 'dd',
'details', 'dl', 'dt', 'footer', 'header', 'summary'
);
/**
* Remove unnecessary paragraphs
* Unnecessary are those which start and end immediately.
* They only get created by isolate_block_elements
* @param mixed text
* @return string
*/
function clean_code ($text)
{
if (is_array($text)) {
$text = implode($text);
}
return str_replace(array('<p class="whiteline"></p>','<p class="break"></p>','<p class="whitelinetop"></p>','<p class="whitelinetopbottom"></p>','<p></p>'),"", $text);
}
//paragraphs aren't allowed in these inline elements -> p closes these elements:
var $inline_elements = array('b', 'big', 'i', 'small', 'tt', 'abbr',
'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong',
'samp', 'var', 'a', 'bdo', 'br', 'map', 'object',
'q', 'script', 'span', 'sub', 'sup', 'button',
'label', 'select', 'textarea'
);
function purge_p($text)
{
$text = str_replace('</p>', "", $text);
return str_replace(array('<p class="whiteline">','<p class="break">', '<p>', '</p>'),"\n", $text);
}
const P_TOP = '<p class="wl_top">';
const P_BOTTOM = '<p class="wl_bottom">';
const P_TOP_BOTTOM = '<p class="wl_top_bottom">';
const P_BREAK = '<p class="break">';
const P_END = '</p>';
/**
* Use nl2p on text within blockelements, useful e.g. with blockquotes
* @param array text
* @return string
*/
function formate_block_elements($textstring)
{
$block_elements = array('<blockquote');
foreach ($block_elements as $start_tag) {
$end_tag = $this->end_tags($start_tag);
//first see if block-element really exists
$start_tag_position = strpos($textstring, $start_tag);
while ($start_tag_position !== false) {
$start_tag_end = strpos($textstring, '>', $start_tag_position)+1;
$blocktext = $this->get_string_till($textstring, $end_tag, $start_tag_end);
$blocktext_length = strlen($blocktext);
$formatted_blocktext = $this->nl2p($blocktext);
//insert formatted_blocktext into old blockelement
$textstring = substr_replace($textstring, $formatted_blocktext, $start_tag_end, $blocktext_length);
function nl2p($text)
{
//homogenize tags
$text = $this->tag_clean($text);
//delete isolation tags from other arrays
if ($this->isolationtags)
{
$this->block_elements = array_diff($this->block_elements,$this->isolationtags);
$this->allowed_p_parents = array_diff($this->allowed_p_parents,$this->isolationtags);
$this->nested_block_elements = array_diff($this->nested_block_elements,$this->isolationtags);
$this->inline_elements = array_diff($this->inline_elements,$this->isolationtags);
$this->singleton_elements = array_diff($this->singleton_elements,$this->isolationtags);
}
else { $this->isolationtags = array(); }
//next blockelement
$start_tag_position = strpos($textstring, $start_tag, $start_tag_end+strlen($formatted_blocktext));
}
}
return $textstring;
}
return $this->blocktag_nl2p($text);
}
/**
* Make sure none of these block_elements are within a <p>
* @param string text
* @return string
*/
function isolate_block_elements($textstring)
{
$block_elements = array('<table','<ul','<ol','<pre', '<dir', '<dl',
'<h1', '<h2', '<h3', '<h4', '<h5', '<h6',
'<menu', '<blockquote');
$block_elements_amount = count($block_elements);
/** Make sure that all the tags are in lowercase
* purge all \n from inside tags
* remove spaces in endtags
* @param string text
* @return text
*/
function tag_clean($textstring)
{
$text = str_split($textstring);
$tagstart = false;
$tagstart_b = false;
$tagdef = false;
$endtag = false;
$tagstyle = false;
for($i=0;$i<$block_elements_amount;$i++) {
$start_tag = $block_elements[$i];
//first see if block-element really exists
$tag_position = strpos($textstring, $start_tag);
if ($tag_position === false) {
continue;
} else {
$end_tag = $this->end_tags($start_tag);
$textstring = str_replace("$start_tag", "</p>$start_tag", $textstring);
$textstring = str_replace("$end_tag", "$end_tag<p>", $textstring);
}
}
return $textstring;
}
for ($i = 0; $i < count($text); $i++)
{
if ($text[$i] == '<' && !strpos($textstring,'>',$i+1) )
{ $text[$i] = '&lt;'; }
elseif ($text[$i] == '>' && !($tagstart !== false || $tagdef || $tagstyle) )
{ $text[$i] = '&gt;'; }
elseif ($text[$i] == '<')
{ $tagstart = $i; }
elseif ($text[$i] == ' ' && $tagstart !== false )
{ $text[$i] = ''; }
elseif ($text[$i] == '>' && $tagstart !== false )
{
$text[$tagstart] = '&lt;';
$text[$i] = '&gt;';
}
elseif ($text[$i] == ' ' && $tagdef)
{
//check if it is a real tag
$tag = substr($textstring,$tagdef,$i-$tagdef);
if ( !(in_array($tag,$this->block_elements)
|| in_array($tag,$this->singleton_elements)
|| in_array($tag,$this->inline_elements)
|| in_array($tag,$this->allowed_p_parents)
|| in_array($tag,$this->isolationtags)
|| in_array($tag,$this->nested_block_elements) ))
{
$text[$tagstart_b] = '&lt;';
$text[strpos($textstring,'>',$i+1)] = '&gt;';
} else
{
$tagstyle = true;
$tagdef = false;
}
}
elseif ($text[$i] == '/' && $tagstart !== false)
{ $endtag = true; }
elseif ($text[$i] == ' ' && $endtag)
{ $text[$i] = ''; }
elseif (($tagstart !== false || $tagdef || $tagstyle) && $text[$i] == "\n")
{ $text[$i] = ''; }
elseif ($text[$i] == '>' && ($tagdef || $tagstyle) )
{
$tagstart = false;
$tagdef = false;
$tagstyle = false;
$endtag = false;
}
elseif ($tagstart !== false)
{
$tagdef = $i;
$tagstart_b = $tagstart;
$tagstart = false;
$text[$i] = strtolower($text[$i]);
}
elseif ($tagdef)
{ $text[$i] = strtolower($text[$i]); }
}
return implode($text);
}
/**
* Remove all <p>-tags from block-elements
* Note: Walking from left to right
* @param array text
* @return string
*/
function tidy_block_elements($text)
{
$remove = false;
$textstring = implode($text);
$block_elements = array('<table','<ul','<ol','<pre', '<dir', '<dl',
'<h1', '<h2', '<h3', '<h4', '<h5', '<h6',
'<menu', '<blockquote');
foreach ($block_elements as $start_tag) {
$end_tag = $this->end_tags($start_tag);
//first see if block-element really exists
$start_tag_position = strpos($textstring, $start_tag);
while ($start_tag_position !== false) {
$start_tag_end = strpos($textstring, '>', $start_tag_position)+1;
$blocktext = $this->get_string_till($textstring, $end_tag, $start_tag_end);
$blocktext_length = strlen($blocktext);
$formatted_blocktext = $this->purge_p($blocktext);
//insert formatted_blocktext into old blockelement
$textstring = substr_replace($textstring, $formatted_blocktext, $start_tag_end, $blocktext_length);
/*
* sophisticated nl to p - blocktag stage
* handles content with blocktags, apply nl2p to the block elements if tag allows it
* works also for ommitted closing tags and singleton tags
* Insert P_BOTTOM class at paragraphs ending with two newlines
* Insert P_BREAK class at paragraphs ending with one newline
* Insert P_TOP class at the first paragraph if starting with a nl
* Insert P_TOP_BOTTOM class if the first paragraph is ending with two newlines
* @param: text
* return string
*/
function blocktag_nl2p($text)
{
//explode string into array of tags and contents
$textarray = $this->explode_along_tags($text);
$content = "";
$start = 0;
$tagstack = array();
$isolation_flag = false;
//next blockelement
$start_tag_position = strpos($textstring, $start_tag, $start_tag_end+strlen($formatted_blocktext));
}
}
return $textstring;
}
for ($i=0; $i < count($textarray); $i++)
{
function get_string_till($text, $end_tag, $offset=0)
{
if (strpos($text, $end_tag, $offset) === false) {
return "";
}
$len = strpos($text, $end_tag, $offset) - $offset;
return substr($text, $offset, $len);
}
//get tag or false if none
$tag = $this->extract_tag($textarray[$i]);
/**
* Return corresponding end-tag: <p -> </p>
*/
function end_tags($start_tag)
{
return str_replace("<", "</", $start_tag).">";
}
//new blocktag - e.g. <table>
if ($tag && $this->is_starttag($textarray[$i])
&& (in_array($tag, $this->block_elements) || in_array($tag, $this->nested_block_elements) ))
{
//merge previous content, apply nl2p if needed and concatenate
if (!$isolation_flag && ( empty($tagstack) || in_array($tagstack[0], $this->allowed_p_parents) ))
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
// clear stack of block elements and insert
if (in_array($tag, $this->block_elements) )
{
$tagstack = array_diff($tagstack, $this->block_elements);
}
// concatenate tag
$content .= $textarray[$i] . "\n";
if (!in_array($tag, $this->singleton_elements) )
{
array_unshift($tagstack, $tag);
}
$start = $i+1;
}
//new tag which can contain paragraphs and can be inside a blocktag - e.g. <td>
elseif ($tag && $this->is_starttag($textarray[$i]) && in_array($tag, $this->allowed_p_parents))
{
//merge previous content, apply nl2p if needed and concatenate
if (!$isolation_flag && ( empty($tagstack) || in_array($tagstack[0], $this->allowed_p_parents) ))
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
//insert tag into the stack and concatenate
array_unshift($tagstack, $tag);
$content .= $textarray[$i];
$start = $i+1;
}
//isolation tag
elseif($tag && $this->is_starttag($textarray[$i]) && in_array($tag, $this->isolationtags) )
{
//merge previous content, apply nl2p if needed and concatenate
if (!$isolation_flag && empty($tagstack) )
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} elseif (!$isolation_flag && in_array($tagstack[0], $this->allowed_p_parents) )
{
$content .= $textarray[$start]
. $this->nl2pblock(implode(array_slice($textarray,$start+1,$i-$start-1)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
$isolation_flag = true;
$start = $i+1;
}
//closing isolation tag
elseif($tag && !$this->is_starttag($textarray[$i]) && in_array($tag, $this->isolationtags) )
{
if ($isolation_flag)
{
//content, no nl2p
$content .= implode(array_slice($textarray,$start,$i-$start-1));
$isolation_flag = false;
$start = $i+1;
}
}
//closing blocktag or p parent - e.g. </table> or </td>
elseif($tag && !$this->is_starttag($textarray[$i]) && !empty($tagstack) && $tag == $tagstack[0])
{
//content, apply nl2p if needed
if ($i != $start)
{
if (!$isolation_flag && in_array($tagstack[0], $this->allowed_p_parents) )
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
}
//closing tag
$content .= $textarray[$i] . "\n";
$start = $i+1;
array_shift($tagstack);
}
}
//merge remainder
if (!$isolation_flag && ( empty($tagstack) || in_array($tagstack[0], $this->allowed_p_parents) ))
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
return $content;
}
/*
* sophisticated nl to p for content which is already
* purged from block elements by blocktag_nl2p
* explode content along \n
* check for following \n
* explode along (inline) tags, get active tags across newlines
* build every paragraph: p class | reopen active tags | content ... | new open tags | closing p tag
* @param string text
* @return string
*/
function nl2pblock($textstring)
{
//check for empty content
if (empty(trim($textstring))) { return $textstring; }
//check for start/end newlines
$startnl = ( strspn($textstring,"\n") ) ? true : false;
$endnl = ( strspn($textstring,"\n",-1 ) ) ? true : false;
$whiteline = false;
$textstring = trim($textstring,"\n");
if (empty($textstring)) { return ''; }
//explode in paragraphs
$textarray = explode("\n",$textstring);
$tagstack = array();
$tagstack_prev = array();
$textline = array();
$content = '';
for($i=0; $i<count($textarray);$i++)
{
//check for whiteline
if ($i < count($textarray) - 1 && empty($textarray[$i+1]) )
{
$whiteline = true;
}
elseif (empty($textarray[$i]))
{
continue;
}
//explode in tags and content
$textline = $this->explode_along_tags($textarray[$i]);
//save active tags
$tagstack_prev = $tagstack;
//iterate trough the tags in the paragraph
for ($j=0; $j<count($textline); $j++)
{
//get tag or false if none
$tag = $this->extract_tag($textline[$j]);
// put or remove tag from stack
if ($tag && $this->is_starttag($textline[$j]) && !in_array($tag,$this->singleton_elements) )
{
array_unshift($tagstack, $textline[$j]);
}
elseif($tag && !$this->is_starttag($textline[$j]) && !empty($tagstack) && $tag == $this->extract_tag($tagstack[0]))
{
array_shift($tagstack);
}
}
//build content
//paragraph class
if ($i == 0 && $startnl && ( $whiteline || ($i == count($textarray)-1 && $endnl) ) )
{
$content .= self::P_TOP_BOTTOM;
}
elseif ($i == 0 && $startnl)
{
$content .= self::P_TOP;
}
elseif ($whiteline || ($i == count($textarray)-1 && $endnl))
{
$content .= self::P_BOTTOM;
} else
{
$content .= self::P_BREAK;
}
//reopen active tags
foreach($tagstack_prev as $ins_tag)
{
$content .= $ins_tag;
}
//content paragraph
$content .= $textarray[$i];
//close open tags
foreach($tagstack as $ins_tag)
{
$content .= $this->html_end_tag($this->extract_tag($ins_tag));
}
//paragraph closing tag
$content .= self::P_END . "\n";
$whiteline = false;
}
return $content;
}
/** explode textstring into array of substrings
* array element can be tag or content
* @param text
* $return array of tags and contents
*/
function explode_along_tags($text)
{
$startpos = 0;
$endpos = 0;
$textarray = array();
do
{
//find tag start
$endpos = strpos($text,'<',$startpos);
if ($endpos === false)
{
//no more tags, copy remainder to array
$endpos = strlen($text);
if ($endpos - $startpos > 0)
{ $textarray[] = substr($text,$startpos,$endpos - $startpos); }
return $textarray;
}
elseif (($endpos - $startpos) > 0)
{
//copy preliminary text to array
$textarray[] = substr($text,$startpos,$endpos - $startpos);
}
$startpos = $endpos;
//find tag end
$endpos = strpos($text,'>',$startpos);
if ($endpos === false) { return false; }
elseif (($endpos - $startpos) > 1)
{
//copy tag to array
$textarray[] = substr($text,$startpos,$endpos - $startpos + 1);
$startpos = $endpos + 1;
}
else { return false; }
} while (1);
return false;
}
function extract_tag($text)
{
if ($text[0] != '<') { return false; }
$n = strcspn($text,' >');
return ltrim(substr($text,0,$n),'</');
}
function html_end_tag($text) { return '</' . $text . '>'; }
function html_start_tag($text) { return '<' . $text . '>'; }
function is_starttag($text) { return ($text[1] == "/") ? false : true; }
/**
* Find next newline separated by text from current position
* @param int start
* $param array text
*/
function next_nl_block($i, $text)
{
$skipped = false;
for ($i--; $i>0; $i-- ) {
if (!$skipped){
//see if you skipped over a non-newline (heading to the next block)
if (strpos($text[$i], "\n") === false) {
$skipped = true;
}
} else if (strpos($text[$i], "\n") !== false) {
break;
}
}
return $i;
}
}