Taming tidy - fixes extra line breaks, don't detect encoding.

This commit is contained in:
Nathan Gray 2011-09-07 15:51:33 +00:00
parent 434081d428
commit 3bdbeef5d6
3 changed files with 56 additions and 8 deletions

View File

@ -64,6 +64,12 @@ abstract class bo_merge
'clean' => true, 'clean' => true,
'output-xhtml' => true, 'output-xhtml' => true,
'show-body-only' => true, 'show-body-only' => true,
'output-encoding' => 'utf-8',
'input-encoding' => 'utf-8',
'quote-ampersand' => false, // Prevent double encoding
'quote-nbsp' => true, // XSLT can handle spaces easier
'preserve-entities' => true,
'wrap' => 0, // Wrapping can break output
); );
/** /**
@ -734,9 +740,19 @@ abstract class bo_merge
if (is_string($value) && (strpos($value,'<') !== false)) if (is_string($value) && (strpos($value,'<') !== false))
{ {
// Clean HTML, if it's being kept // Clean HTML, if it's being kept
if($replace_tags && extension_loaded('tidy')) if($replace_tags && extension_loaded('tidy')) {
{ $tidy = new tidy();
$value = tidy_repair_string($value, self::$tidy_config, 'utf8'); $cleaned = $tidy->repairString($value, self::$tidy_config);
// Found errors. Strip it all so there's some output
if($tidy->getStatus() == 2)
{
error_log($tidy->errorBuffer);
$value = strip_tags($value);
}
else
{
$value = $cleaned;
}
} }
// replace </p> and <br /> with CRLF (remove <p> and CRLF) // replace </p> and <br /> with CRLF (remove <p> and CRLF)
$value = str_replace(array("\r","\n",'<p>','</p>','<br />'),array('','','',"\r\n","\r\n"),$value); $value = str_replace(array("\r","\n",'<p>','</p>','<br />'),array('','','',"\r\n","\r\n"),$value);

View File

@ -86,7 +86,11 @@ Breakers
</xsl:template> </xsl:template>
--> -->
<xsl:template name="apply-styles" match="w:r[descendant::strong|descendant::em|descendant::u|descendant::span]"> <xsl:template match="w:r[descendant::strong|descendant::em|descendant::u|descendant::span]">
<xsl:call-template name="apply-styles"/>
</xsl:template>
<xsl:template name="apply-styles">
<xsl:for-each select="node()|@*[not(w:rPr)]"> <xsl:for-each select="node()|@*[not(w:rPr)]">
<xsl:choose> <xsl:choose>
<xsl:when test="descendant::strong|descendant::em|descendant::u|descendant::span" > <xsl:when test="descendant::strong|descendant::em|descendant::u|descendant::span" >
@ -210,9 +214,23 @@ Breakers
<w:t>&#xB7;</w:t><w:tab/> <w:t>&#xB7;</w:t><w:tab/>
</xsl:otherwise> </xsl:otherwise>
</xsl:choose> </xsl:choose>
</w:r><w:r>
<w:t><xsl:value-of select="normalize-space(text())" /></w:t>
</w:r> </w:r>
<xsl:choose>
<xsl:when test="count(child::*)=0">
<xsl:variable name="text">
<xsl:value-of select="substring-after(text(),' ')"/>
</xsl:variable>
<w:r><w:t>
<xsl:value-of select="normalize-space($text)"/>
</w:t></w:r>
</xsl:when>
<xsl:otherwise>
<!-- Strip out styles, they would need to be processed-->
<w:r><w:t>
<xsl:value-of select = "normalize-space(child::*)"/>
</w:t></w:r>
</xsl:otherwise>
</xsl:choose>
</w:p> </w:p>
</xsl:for-each> </xsl:for-each>
</xsl:template> </xsl:template>

View File

@ -208,9 +208,23 @@ Breakers
<w:t>&#xB7;</w:t><w:tab/> <w:t>&#xB7;</w:t><w:tab/>
</xsl:otherwise> </xsl:otherwise>
</xsl:choose> </xsl:choose>
</w:r><w:r>
<w:t><xsl:value-of select="normalize-space(text())" /></w:t>
</w:r> </w:r>
<xsl:choose>
<xsl:when test="count(child::*)=0">
<xsl:variable name="text">
<xsl:value-of select="substring-after(text(),' ')"/>
</xsl:variable>
<w:r><w:t>
<xsl:value-of select="normalize-space($text)"/>
</w:t></w:r>
</xsl:when>
<xsl:otherwise>
<!-- Strip out styles, they would need to be processed-->
<w:r><w:t>
<xsl:value-of select = "normalize-space(child::*)"/>
</w:t></w:r>
</xsl:otherwise>
</xsl:choose>
</w:p> </w:p>
</xsl:for-each> </xsl:for-each>
</xsl:template> </xsl:template>