* Mail: keep number & bullet lists when converting to plain text

This commit is contained in:
nathangray 2019-11-08 11:42:25 -07:00
parent d8faef3503
commit 5acd287e8d
2 changed files with 115 additions and 0 deletions

View File

@ -303,6 +303,13 @@ class Html
$_html = str_replace(array("\r\n","\n"),($isHTML?'':' '),$_html); $_html = str_replace(array("\r\n","\n"),($isHTML?'':' '),$_html);
} }
} }
// Handle lists
if(stripos($_html, '<li') !== false)
{
$_html = self::replaceLists($_html);
}
$tags = array ( $tags = array (
0 => '~<h[123][^>]*>\r*\n*~si', 0 => '~<h[123][^>]*>\r*\n*~si',
1 => '~<h[456][^>]*>\r*\n*~si', 1 => '~<h[456][^>]*>\r*\n*~si',
@ -471,6 +478,48 @@ class Html
} }
} }
/**
* Replace HTML lists with a plain text equivalent
*
* @param string $html
*
* @return string
*/
static function replaceLists($html)
{
if(!$html || stripos($html, '<li') === False)
{
return $html;
}
$dom = \DOMDocument::loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_NOBLANKS);
if(!$dom)
{
// Failed to parse
return $html;
}
$dom->normalizeDocument();
foreach(array('ol','ul') as $list_type)
{
$lists = $dom->getElementsByTagName($list_type);
foreach($lists as $list)
{
$list_text = "\r\n";
$item_count = 0;
$prefix = $list_type == 'ul' ? ' * ' : '. ';
foreach($list->getElementsByTagName('li') as $element)
{
$list_text .= ($list_type == 'ol' ? ' '. ++$item_count : '') . $prefix . $element->textContent . "\r\n";
}
$list->parentNode->replaceChild($dom->createTextNode($list_text), $list);
}
}
return $dom->saveHTML();
}
/** /**
* split html by PRE tag, return array with all content pre-sections isolated in array elements * split html by PRE tag, return array with all content pre-sections isolated in array elements
* @author Leithoff, Klaus * @author Leithoff, Klaus

View File

@ -0,0 +1,66 @@
<?php
/**
* EGroupware Api: HTML handling tests
*
* @link http://egroupware.org
* @package api
* @subpackage mail
* @author Nathan Gray
* @license http://opensource.org/licenses/gpl-license.php GPL - GNU General Public License
*/
namespace EGroupware\Api\Mail;
use EGroupware\Api;
use PHPUnit\Framework\TestCase;
/**
* Tests for HTML handling
*
* @author nathan
*/
class HtmlTest extends TestCase {
/**
* Test how HTML lists (ol & ul) get converted to a plain text equivalent
*
* @dataProvider listDataProvider
*/
public function testListToText($html, $expected_text)
{
$replaced = Html::replaceLists($html);
$this->assertEquals($expected_text, $replaced);
}
/**
* Data for checking HTML list conversion to plain text
*
* HTML first, then expected text
*/
public function listDataProvider()
{
return array(
// HTML
// Plaintext
['', ''],
['Not actually HTML', 'Not actually HTML'],
['HTML, but <b>NO</b> list here', 'HTML, but <b>NO</b> list here'],
["<p>Unordered list:<ul><li>First</li>\r\n<li>Second</li>\r\n<li>Third</li>\r\n</ul>\r\nPost text</p>",
"<p>Unordered list:\r\n * First\r\n * Second\r\n * Third\r\n<p>\r\nPost text</p></p>\n"],
["Ordered list:".
"<ol><li>First</li>\r\n"
. "<li>Second</li>\r\n"
. "<li>Third</li>\r\n"
. "</ol>Post text",
"<p>Ordered list:\r\n"
. " 1. First\r\n"
. " 2. Second\r\n"
. " 3. Third\r\n"
. "<p>Post text</p></p>\n"],
);
}
}