From b8d9b748ac45250dcc34551c749feb1b3d91b09d Mon Sep 17 00:00:00 2001 From: Nathan Gray Date: Wed, 1 Feb 2012 00:03:31 +0000 Subject: [PATCH] - Re-arrange HTML tag order to account for simplistic newline handling (broke nested tags) - If unable to load/parse content after trying to convert HTML styles into target document styles, fall back and try again, but strip all tags this time. --- etemplate/inc/class.bo_merge.inc.php | 92 ++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 25 deletions(-) diff --git a/etemplate/inc/class.bo_merge.inc.php b/etemplate/inc/class.bo_merge.inc.php index 290531c461..edebc39d9b 100644 --- a/etemplate/inc/class.bo_merge.inc.php +++ b/etemplate/inc/class.bo_merge.inc.php @@ -71,6 +71,11 @@ abstract class bo_merge 'wrap' => 0, // Wrapping can break output ); + /** + * Parse HTML styles into target document style, if possible + */ + protected $parse_html_styles = true; + /** * Constructor * @@ -518,7 +523,20 @@ abstract class bo_merge // Remove spans with no attributes, linebreaks inside them cause problems '/(.*?)<\/span>/' => '$1' ); - $content = preg_replace(array_keys($replace_tags),array_values($replace_tags),$content); + $content = preg_replace(array_keys($replace_tags),array_values($replace_tags),$content, -1, $count); + + /* + In the case where you have something like (invalid - mismatched tags), + it takes multiple runs to get rid of both spans. So, loop. + OO.o files have not yet been shown to have this problem. + */ + $count = $i = 0; + do + { + $content = preg_replace('/(.*?)<\/span>/','$1',$content, -1, $count); + $i++; + } while($count > 0 && $i < 10); + //echo $content;die(); $doc = new DOMDocument(); $xslt = new XSLTProcessor(); @@ -531,31 +549,27 @@ abstract class bo_merge // XSLT transform known tags if($xslt) { - try + // does NOT work with php 5.2.6: Catchable fatal error: argument 1 to transformToXml() must be of type DOMDocument + //$element = new SimpleXMLelement($content); + $element = new DOMDocument('1.0', 'utf-8'); + $result = $element->loadXML($content); + if(!$result) { - // does NOT work with php 5.2.6: Catchable fatal error: argument 1 to transformToXml() must be of type DOMDocument - //$element = new SimpleXMLelement($content); - $element = new DOMDocument('1.0', 'utf-8'); - $element->loadXML($content); - $content = $xslt->transformToXml($element); + throw new Exception('Unable to parse merged document for styles. Check warnings in log for details.'); + } + $content = $xslt->transformToXml($element); //echo $content;die(); - // Word 2003 needs two declarations, add extra declaration back in - if($mimetype == 'application/xml' && $mso_application_progid == 'Word.Document' && strpos($content, ''.$content; - } - // Validate - /* - $doc = new DOMDocument(); - $doc->loadXML($content); - $doc->schemaValidate(*Schema (xsd) file*); - */ - } - catch (Exception $e) - { - error_log($e->getMessage()); - // Failed... + // Word 2003 needs two declarations, add extra declaration back in + if($mimetype == 'application/xml' && $mso_application_progid == 'Word.Document' && strpos($content, ''.$content; } + // Validate + /* + $doc = new DOMDocument(); + $doc->loadXML($content); + $doc->schemaValidate(*Schema (xsd) file*); + */ } } @@ -804,7 +818,7 @@ abstract class bo_merge // Tags we can replace with the target document's version $replace_tags = array(); // only keep tags, if we have xsl extension available - if (class_exists(XSLTProcessor) && class_exists(DOMDocument)) + if (class_exists(XSLTProcessor) && class_exists(DOMDocument) && $this->parse_html_styles) { switch($mimetype.$mso_application_progid) { @@ -866,6 +880,18 @@ abstract class bo_merge // replace

and
with CRLF (remove

and CRLF) $value = str_replace(array("\r","\n",'

','

','
'),array('','','',"\r\n","\r\n"),$value); $value = strip_tags($value,implode('',$replace_tags)); + + // Change ...\r\n to ...\r\n or simplistic line break below will mangle it + // Loop to catch things like Break:\r\n + if($mso_application_progid) + { + $count = $i = 0; + do + { + $value = preg_replace('/<(b|strong|i|em|u|span)\b([^>]*?)>(.*?)'."\r\n".'<\/\1>/u', '<$1$2>$3'."\r\n",$value,-1,$count); + $i++; + } while($count > 0 && $i < 10); // Limit of 10 chosen arbitrarily just in case + } } // replace all control chars (C0+C1) but CR (\015), LF (\012) and TAB (\011) (eg. vertical tabulators) with space // as they are not allowed in xml @@ -1150,9 +1176,25 @@ abstract class bo_merge // Apply HTML formatting to target document, if possible // check if we can use the XSL extension, to not give a fatal error and rendering whole merge-print non-functional - if (class_exists(XSLTProcessor) && class_exists(DOMDocument)) + if (class_exists(XSLTProcessor) && class_exists(DOMDocument) && $this->parse_html_styles) { - $this->apply_styles($merged, $mimetype); + try + { + $this->apply_styles($merged, $mimetype); + } + catch (Exception $e) + { + // Error converting HTML styles over + error_log($e->getMessage()); + error_log("Target document: $content_url, IDs: ". array2string($ids)); + + // Try again, but strip HTML so user gets something + $this->parse_html_styles = false; + if (!($merged =& $this->merge($content_url,$ids,$err,$mimetype,$fix))) + { + return $err; + } + } } if(!empty($name)) {