Archived
1
0

NL2P operation completely new written, works now from front to back according to tags

This commit is contained in:
Stephan Brunker
2017-12-12 22:57:57 +01:00
committed by onli
parent b837adde3c
commit 43b9f607d3
@@ -269,16 +269,23 @@ class serendipity_event_nl2br extends serendipity_event
$element = $temp['element']; $element = $temp['element'];
if ($p_tags) { if ($p_tags) {
// NL2P OPERATION
$this->isolationtags = $isolate;
$text = $eventData[$element]; $text = $eventData[$element];
if (!empty($text)) { if (!empty($text)) {
//Standardize line endings to Unix //Standardize line endings to Unix
$text = str_replace(array("\r\n", "\r"), "\n", $text); $text = str_replace(array("\r\n", "\r"), "\n", $text);
if ($element == 'body' && isset($eventData['extended'])) {
//move newlines from body to extended //move newlines from body to extended
if ($element == 'body' && isset($eventData['extended']))
{
$eventData['extended'] = str_repeat("\n",strspn($text,"\n",-1)) . $eventData['extended']; $eventData['extended'] = str_repeat("\n",strspn($text,"\n",-1)) . $eventData['extended'];
$text = rtrim($text,"\n");
} }
$eventData[$element] = $this->nl2p($text, $element); $eventData[$element] = $this->nl2p($text);
} }
// NL2BR OPERATION
} else if ($isolate) { } else if ($isolate) {
$eventData[$element] = $this->isolate($eventData[$element], '~[<\[](' . implode('|', $isolate) . ').*?[>\]].*?[<\[]/\1[>\]]~si'); $eventData[$element] = $this->isolate($eventData[$element], '~[<\[](' . implode('|', $isolate) . ').*?[>\]].*?[<\[]/\1[>\]]~si');
$eventData[$element] = nl2br($eventData[$element]); $eventData[$element] = nl2br($eventData[$element]);
@@ -325,17 +332,17 @@ class serendipity_event_nl2br extends serendipity_event
/* nl2br plugin start */ /* nl2br plugin start */
p.whitelinebottom { p.wl_bottom {
margin-top: 0em; margin-top: 0em;
margin-bottom: 1em; margin-bottom: 1em;
} }
p.whitelinetop { p.wl_top {
margin-top: 1em; margin-top: 1em;
margin-bottom: 0em; margin-bottom: 0em;
} }
p.whitelinetopbottom { p.wl_top_bottom {
margin-top: 1em; margin-top: 1em;
margin-bottom: 1em; margin-bottom: 1em;
} }
@@ -387,215 +394,427 @@ p.break {
} }
/** ====================================
* NL2P OPERATION
* ====================================
*/
/** // following w3.org, these elements close p elements automatically:
* Insert <p class="whiteline" at paragraphs ending with two newlines var $block_elements = array('table','ul','ol','pre', 'dl',
* Insert <p class="break" at paragraphs ending with one or no nl 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
* Insert <p class="whitelinetop" at the first paragraph if starting with a nl 'menu', 'section',
* Insert <p class="whitelinetopbottom" if the first paragraph is ending with two newlines 'address', 'article', 'aside', 'fieldset', 'footer',
* @param string text 'form', 'header', 'hgroup', 'hr', 'main', 'nav', 'p'
* @param boolean complex operations (not necessary when text is flat) );
* @return string
*/
function nl2p(&$text, $element='any', $complex=true)
{
//check if string starts with a newline (extended only)
$startnl = ($element =='extended' && strspn($text,"\n")) ? true : false;
//trim whitespaces and line breaks
$text = trim($text);
//split into array
$text = str_split($text);
$big_p = '<p class="whiteline">'; var $nested_block_elements = array('div','table','blockquote');
$small_p = '<p class="break">';
var $singleton_elements = array('area', 'br', 'col', 'command', 'embed', 'hr',
'img', 'input', 'keygen', 'link', 'param', 'source',
'track', 'wbr', '!--'
);
$i = count($text) - 1; var $allowed_p_parents = array('blockquote', 'td', 'div', 'article', 'aside', 'dd',
$whiteline = false; 'details', 'dl', 'dt', 'footer', 'header', 'summary'
);
//main operation: convert \n to big_p and small_p
while ($i > 0) {
//search next /n enclosing text, starting at $i-1
$i = $this->next_nl_block($i, $text);
if ($i == 0) { //no newlines left
break;
} elseif ($whiteline == true) {
$text[$i] = '</p>' . $big_p;
} else {
$text[$i] = '</p>' . $small_p;
}
//look ahead for next paragraph class
if ($text[$i-1] === "\n") {
$whiteline = true;
$i--;
} else {
$whiteline = false;
}
}
if ($whiteline && $startnl) {
$start_tag = '<p class="whitelinetopbottom">';
} elseif ($startnl) {
$start_tag = '<p class="whitelinetop">';
} elseif ($whiteline) {
$start_tag = $big_p;
} else {
$start_tag = $small_p;
}
if ($complex) {
$textstring = $this->tidy_block_elements($text);
$textstring = $this->formate_block_elements($textstring);
$textstring = $this->isolate_block_elements($textstring);
$textstring = $start_tag . $textstring . '</p>';
return $this->clean_code($textstring);
}
return $start_tag . implode($text) . '</p>';
}
/** //paragraphs aren't allowed in these inline elements -> p closes these elements:
* Remove unnecessary paragraphs var $inline_elements = array('b', 'big', 'i', 'small', 'tt', 'abbr',
* Unnecessary are those which start and end immediately. 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong',
* They only get created by isolate_block_elements 'samp', 'var', 'a', 'bdo', 'br', 'map', 'object',
* @param mixed text 'q', 'script', 'span', 'sub', 'sup', 'button',
* @return string 'label', 'select', 'textarea'
*/ );
function clean_code ($text)
{
if (is_array($text)) {
$text = implode($text);
}
return str_replace(array('<p class="whiteline"></p>','<p class="break"></p>','<p class="whitelinetop"></p>','<p class="whitelinetopbottom"></p>','<p></p>'),"", $text);
}
function purge_p($text) const P_TOP = '<p class="wl_top">';
{ const P_BOTTOM = '<p class="wl_bottom">';
$text = str_replace('</p>', "", $text); const P_TOP_BOTTOM = '<p class="wl_top_bottom">';
return str_replace(array('<p class="whiteline">','<p class="break">', '<p>', '</p>'),"\n", $text); const P_BREAK = '<p class="break">';
} const P_END = '</p>';
/** function nl2p($text)
* Use nl2p on text within blockelements, useful e.g. with blockquotes {
* @param array text //homogenize tags
* @return string $text = $this->tag_clean($text);
*/
function formate_block_elements($textstring) //delete isolation tags from other arrays
{ if ($this->isolationtags)
$block_elements = array('<blockquote'); {
foreach ($block_elements as $start_tag) { $this->block_elements = array_diff($this->block_elements,$this->isolationtags);
$end_tag = $this->end_tags($start_tag); $this->allowed_p_parents = array_diff($this->allowed_p_parents,$this->isolationtags);
//first see if block-element really exists $this->nested_block_elements = array_diff($this->nested_block_elements,$this->isolationtags);
$start_tag_position = strpos($textstring, $start_tag); $this->inline_elements = array_diff($this->inline_elements,$this->isolationtags);
while ($start_tag_position !== false) { $this->singleton_elements = array_diff($this->singleton_elements,$this->isolationtags);
$start_tag_end = strpos($textstring, '>', $start_tag_position)+1; }
$blocktext = $this->get_string_till($textstring, $end_tag, $start_tag_end); else { $this->isolationtags = array(); }
$blocktext_length = strlen($blocktext);
$formatted_blocktext = $this->nl2p($blocktext);
//insert formatted_blocktext into old blockelement
$textstring = substr_replace($textstring, $formatted_blocktext, $start_tag_end, $blocktext_length);
//next blockelement return $this->blocktag_nl2p($text);
$start_tag_position = strpos($textstring, $start_tag, $start_tag_end+strlen($formatted_blocktext)); }
}
}
return $textstring;
}
/** /** Make sure that all the tags are in lowercase
* Make sure none of these block_elements are within a <p> * purge all \n from inside tags
* @param string text * remove spaces in endtags
* @return string * @param string text
*/ * @return text
function isolate_block_elements($textstring) */
{ function tag_clean($textstring)
$block_elements = array('<table','<ul','<ol','<pre', '<dir', '<dl', {
'<h1', '<h2', '<h3', '<h4', '<h5', '<h6', $text = str_split($textstring);
'<menu', '<blockquote'); $tagstart = false;
$block_elements_amount = count($block_elements); $tagstart_b = false;
$tagdef = false;
$endtag = false;
$tagstyle = false;
for($i=0;$i<$block_elements_amount;$i++) { for ($i = 0; $i < count($text); $i++)
$start_tag = $block_elements[$i]; {
//first see if block-element really exists if ($text[$i] == '<' && !strpos($textstring,'>',$i+1) )
$tag_position = strpos($textstring, $start_tag); { $text[$i] = '&lt;'; }
if ($tag_position === false) { elseif ($text[$i] == '>' && !($tagstart !== false || $tagdef || $tagstyle) )
continue; { $text[$i] = '&gt;'; }
} else { elseif ($text[$i] == '<')
$end_tag = $this->end_tags($start_tag); { $tagstart = $i; }
$textstring = str_replace("$start_tag", "</p>$start_tag", $textstring); elseif ($text[$i] == ' ' && $tagstart !== false )
$textstring = str_replace("$end_tag", "$end_tag<p>", $textstring); { $text[$i] = ''; }
} elseif ($text[$i] == '>' && $tagstart !== false )
} {
return $textstring; $text[$tagstart] = '&lt;';
} $text[$i] = '&gt;';
}
elseif ($text[$i] == ' ' && $tagdef)
{
//check if it is a real tag
$tag = substr($textstring,$tagdef,$i-$tagdef);
if ( !(in_array($tag,$this->block_elements)
|| in_array($tag,$this->singleton_elements)
|| in_array($tag,$this->inline_elements)
|| in_array($tag,$this->allowed_p_parents)
|| in_array($tag,$this->isolationtags)
|| in_array($tag,$this->nested_block_elements) ))
{
$text[$tagstart_b] = '&lt;';
$text[strpos($textstring,'>',$i+1)] = '&gt;';
} else
{
$tagstyle = true;
$tagdef = false;
}
}
elseif ($text[$i] == '/' && $tagstart !== false)
{ $endtag = true; }
elseif ($text[$i] == ' ' && $endtag)
{ $text[$i] = ''; }
elseif (($tagstart !== false || $tagdef || $tagstyle) && $text[$i] == "\n")
{ $text[$i] = ''; }
elseif ($text[$i] == '>' && ($tagdef || $tagstyle) )
{
$tagstart = false;
$tagdef = false;
$tagstyle = false;
$endtag = false;
}
elseif ($tagstart !== false)
{
$tagdef = $i;
$tagstart_b = $tagstart;
$tagstart = false;
$text[$i] = strtolower($text[$i]);
}
elseif ($tagdef)
{ $text[$i] = strtolower($text[$i]); }
}
return implode($text);
}
/** /*
* Remove all <p>-tags from block-elements * sophisticated nl to p - blocktag stage
* Note: Walking from left to right * handles content with blocktags, apply nl2p to the block elements if tag allows it
* @param array text * works also for ommitted closing tags and singleton tags
* @return string * Insert P_BOTTOM class at paragraphs ending with two newlines
*/ * Insert P_BREAK class at paragraphs ending with one newline
function tidy_block_elements($text) * Insert P_TOP class at the first paragraph if starting with a nl
{ * Insert P_TOP_BOTTOM class if the first paragraph is ending with two newlines
$remove = false; * @param: text
$textstring = implode($text); * return string
$block_elements = array('<table','<ul','<ol','<pre', '<dir', '<dl', */
'<h1', '<h2', '<h3', '<h4', '<h5', '<h6', function blocktag_nl2p($text)
'<menu', '<blockquote'); {
foreach ($block_elements as $start_tag) { //explode string into array of tags and contents
$end_tag = $this->end_tags($start_tag); $textarray = $this->explode_along_tags($text);
//first see if block-element really exists $content = "";
$start_tag_position = strpos($textstring, $start_tag); $start = 0;
while ($start_tag_position !== false) { $tagstack = array();
$start_tag_end = strpos($textstring, '>', $start_tag_position)+1; $isolation_flag = false;
$blocktext = $this->get_string_till($textstring, $end_tag, $start_tag_end);
$blocktext_length = strlen($blocktext);
$formatted_blocktext = $this->purge_p($blocktext);
//insert formatted_blocktext into old blockelement
$textstring = substr_replace($textstring, $formatted_blocktext, $start_tag_end, $blocktext_length);
//next blockelement for ($i=0; $i < count($textarray); $i++)
$start_tag_position = strpos($textstring, $start_tag, $start_tag_end+strlen($formatted_blocktext)); {
}
}
return $textstring;
}
function get_string_till($text, $end_tag, $offset=0) //get tag or false if none
{ $tag = $this->extract_tag($textarray[$i]);
if (strpos($text, $end_tag, $offset) === false) {
return "";
}
$len = strpos($text, $end_tag, $offset) - $offset;
return substr($text, $offset, $len);
}
/** //new blocktag - e.g. <table>
* Return corresponding end-tag: <p -> </p> if ($tag && $this->is_starttag($textarray[$i])
*/ && (in_array($tag, $this->block_elements) || in_array($tag, $this->nested_block_elements) ))
function end_tags($start_tag) {
{ //merge previous content, apply nl2p if needed and concatenate
return str_replace("<", "</", $start_tag).">"; if (!$isolation_flag && ( empty($tagstack) || in_array($tagstack[0], $this->allowed_p_parents) ))
} {
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
// clear stack of block elements and insert
if (in_array($tag, $this->block_elements) )
{
$tagstack = array_diff($tagstack, $this->block_elements);
}
// concatenate tag
$content .= $textarray[$i] . "\n";
if (!in_array($tag, $this->singleton_elements) )
{
array_unshift($tagstack, $tag);
}
$start = $i+1;
}
//new tag which can contain paragraphs and can be inside a blocktag - e.g. <td>
elseif ($tag && $this->is_starttag($textarray[$i]) && in_array($tag, $this->allowed_p_parents))
{
//merge previous content, apply nl2p if needed and concatenate
if (!$isolation_flag && ( empty($tagstack) || in_array($tagstack[0], $this->allowed_p_parents) ))
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
//insert tag into the stack and concatenate
array_unshift($tagstack, $tag);
$content .= $textarray[$i];
$start = $i+1;
}
//isolation tag
elseif($tag && $this->is_starttag($textarray[$i]) && in_array($tag, $this->isolationtags) )
{
//merge previous content, apply nl2p if needed and concatenate
if (!$isolation_flag && empty($tagstack) )
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} elseif (!$isolation_flag && in_array($tagstack[0], $this->allowed_p_parents) )
{
$content .= $textarray[$start]
. $this->nl2pblock(implode(array_slice($textarray,$start+1,$i-$start-1)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
$isolation_flag = true;
$start = $i+1;
}
//closing isolation tag
elseif($tag && !$this->is_starttag($textarray[$i]) && in_array($tag, $this->isolationtags) )
{
if ($isolation_flag)
{
//content, no nl2p
$content .= implode(array_slice($textarray,$start,$i-$start-1));
$isolation_flag = false;
$start = $i+1;
}
}
//closing blocktag or p parent - e.g. </table> or </td>
elseif($tag && !$this->is_starttag($textarray[$i]) && !empty($tagstack) && $tag == $tagstack[0])
{
//content, apply nl2p if needed
if ($i != $start)
{
if (!$isolation_flag && in_array($tagstack[0], $this->allowed_p_parents) )
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
}
//closing tag
$content .= $textarray[$i] . "\n";
$start = $i+1;
array_shift($tagstack);
}
}
//merge remainder
if (!$isolation_flag && ( empty($tagstack) || in_array($tagstack[0], $this->allowed_p_parents) ))
{
$content .= $this->nl2pblock(implode(array_slice($textarray,$start,$i-$start)));
} else
{
$content .= implode(array_slice($textarray,$start,$i-$start));
}
return $content;
}
/*
* sophisticated nl to p for content which is already
* purged from block elements by blocktag_nl2p
* explode content along \n
* check for following \n
* explode along (inline) tags, get active tags across newlines
* build every paragraph: p class | reopen active tags | content ... | new open tags | closing p tag
* @param string text
* @return string
*/
function nl2pblock($textstring)
{
//check for empty content
if (empty(trim($textstring))) { return $textstring; }
//check for start/end newlines
$startnl = ( strspn($textstring,"\n") ) ? true : false;
$endnl = ( strspn($textstring,"\n",-1 ) ) ? true : false;
$whiteline = false;
$textstring = trim($textstring,"\n");
if (empty($textstring)) { return ''; }
//explode in paragraphs
$textarray = explode("\n",$textstring);
$tagstack = array();
$tagstack_prev = array();
$textline = array();
$content = '';
for($i=0; $i<count($textarray);$i++)
{
//check for whiteline
if ($i < count($textarray) - 1 && empty($textarray[$i+1]) )
{
$whiteline = true;
}
elseif (empty($textarray[$i]))
{
continue;
}
//explode in tags and content
$textline = $this->explode_along_tags($textarray[$i]);
//save active tags
$tagstack_prev = $tagstack;
//iterate trough the tags in the paragraph
for ($j=0; $j<count($textline); $j++)
{
//get tag or false if none
$tag = $this->extract_tag($textline[$j]);
// put or remove tag from stack
if ($tag && $this->is_starttag($textline[$j]) && !in_array($tag,$this->singleton_elements) )
{
array_unshift($tagstack, $textline[$j]);
}
elseif($tag && !$this->is_starttag($textline[$j]) && !empty($tagstack) && $tag == $this->extract_tag($tagstack[0]))
{
array_shift($tagstack);
}
}
//build content
//paragraph class
if ($i == 0 && $startnl && ( $whiteline || ($i == count($textarray)-1 && $endnl) ) )
{
$content .= self::P_TOP_BOTTOM;
}
elseif ($i == 0 && $startnl)
{
$content .= self::P_TOP;
}
elseif ($whiteline || ($i == count($textarray)-1 && $endnl))
{
$content .= self::P_BOTTOM;
} else
{
$content .= self::P_BREAK;
}
//reopen active tags
foreach($tagstack_prev as $ins_tag)
{
$content .= $ins_tag;
}
//content paragraph
$content .= $textarray[$i];
//close open tags
foreach($tagstack as $ins_tag)
{
$content .= $this->html_end_tag($this->extract_tag($ins_tag));
}
//paragraph closing tag
$content .= self::P_END . "\n";
$whiteline = false;
}
return $content;
}
/** explode textstring into array of substrings
* array element can be tag or content
* @param text
* $return array of tags and contents
*/
function explode_along_tags($text)
{
$startpos = 0;
$endpos = 0;
$textarray = array();
do
{
//find tag start
$endpos = strpos($text,'<',$startpos);
if ($endpos === false)
{
//no more tags, copy remainder to array
$endpos = strlen($text);
if ($endpos - $startpos > 0)
{ $textarray[] = substr($text,$startpos,$endpos - $startpos); }
return $textarray;
}
elseif (($endpos - $startpos) > 0)
{
//copy preliminary text to array
$textarray[] = substr($text,$startpos,$endpos - $startpos);
}
$startpos = $endpos;
//find tag end
$endpos = strpos($text,'>',$startpos);
if ($endpos === false) { return false; }
elseif (($endpos - $startpos) > 1)
{
//copy tag to array
$textarray[] = substr($text,$startpos,$endpos - $startpos + 1);
$startpos = $endpos + 1;
}
else { return false; }
} while (1);
return false;
}
function extract_tag($text)
{
if ($text[0] != '<') { return false; }
$n = strcspn($text,' >');
return ltrim(substr($text,0,$n),'</');
}
function html_end_tag($text) { return '</' . $text . '>'; }
function html_start_tag($text) { return '<' . $text . '>'; }
function is_starttag($text) { return ($text[1] == "/") ? false : true; }
/**
* Find next newline separated by text from current position
* @param int start
* $param array text
*/
function next_nl_block($i, $text)
{
$skipped = false;
for ($i--; $i>0; $i-- ) {
if (!$skipped){
//see if you skipped over a non-newline (heading to the next block)
if (strpos($text[$i], "\n") === false) {
$skipped = true;
}
} else if (strpos($text[$i], "\n") !== false) {
break;
}
}
return $i;
}
} }