* Translate basic HTML formatting (bold, underline, cursive, text & background color, bullets & lists, fonts, size) into target document's formatting when merging into a template. Works for odt, docx and some xml.

This commit is contained in:
Nathan Gray 2011-08-30 16:08:34 +00:00
commit 98ec62d0e2
4 changed files with 999 additions and 7 deletions

View File

@ -313,12 +313,99 @@ abstract class bo_merge
{
$mimetype = 'application/rtf';
}
try {
return $this->merge_string($content,$ids,$err,$mimetype,$fix);
$content = $this->merge_string($content,$ids,$err,$mimetype,$fix);
} catch (Exception $e) {
$err = $e->getMessage();
return false;
}
return $content;
}
protected function apply_styles (&$content, $mimetype)
{
if ($mimetype == 'application/xml' &&
preg_match('/'.preg_quote('<?mso-application progid="').'([^"]+)'.preg_quote('"?>').'/',substr($content,0,200),$matches))
{
$mso_application_progid = $matches[1];
}
else
{
$mso_application_progid = '';
}
// Tags we can replace with the target document's version
$replace_tags = array();
switch($mimetype.$mso_application_progid)
{
case 'application/vnd.oasis.opendocument.text': // open office
case 'application/vnd.oasis.opendocument.spreadsheet':
// It seems easier to split the parent tags here
$replace_tags = array(
'/<(ol|ul|table)( [^>]*)?>/' => '</text:p><$1$2>',
'/<\/(ol|ul|table)>/' => '</$1><text:p>',
//'/<(li)(.*?)>(.*?)<\/\1>/' => '<$1 $2>$3</$1>',
);
$content = preg_replace(array_keys($replace_tags),array_values($replace_tags),$content);
$doc = new DOMDocument();
$xslt = new XSLTProcessor();
$doc->load(EGW_INCLUDE_ROOT.'/etemplate/templates/default/openoffice.xslt');
$xslt->importStyleSheet($doc);
//echo $content;die();
break;
case 'application/xmlWord.Document': // Word 2003*/
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': // ms office 2007
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
$replace_tags = array(
'b','strong','i','em','u','span'
);
// It seems easier to split the parent tags here
$replace_tags = array(
// Tables, lists don't go inside <w:p>
'/<(ol|ul|table)( [^>]*)?>/' => '</w:t></w:r></w:p><$1$2>',
'/<\/(ol|ul|table)>/' => '</$1><w:p><w:r><w:t>',
// Fix for things other than text (newlines) inside table row
'/<(td)( [^>]*)?>((?!<w:t>))(.*?)<\/td>[\s]*?/' => '<$1$2><w:t>$4</w:t></td>',
'/<(li)(.*?)>(.*?)<\/\1>/' => '<$1 $2>$3</$1>',
// Remove extra whitespace
'/<w:t>[\s]+(.*?)<\/w:t>/' => '<w:t>$1</w:t>'
);
$content = preg_replace(array_keys($replace_tags),array_values($replace_tags),$content);
//echo $content;die();
$doc = new DOMDocument();
$xslt = new XSLTProcessor();
$xslt_file = $mimetype == 'application/xml' ? 'wordml.xslt' : 'msoffice.xslt';
$doc->load(EGW_INCLUDE_ROOT.'/etemplate/templates/default/'.$xslt_file);
$xslt->importStyleSheet($doc);
break;
}
// XSLT transform known tags
if($xslt)
{
try
{
$element = new SimpleXMLelement($content);
$content = @$xslt->transformToXml($element);
// Word 2003 needs two declarations, add extra declaration back in
if($mimetype == 'application/xml' && $mso_application_progid == 'Word.Document' && strpos($content, '<?xml') !== 0) {
$content = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'.$content;
}
// Validate
/*
$doc = new DOMDocument();
$doc->loadXML($content);
$doc->schemaValidate(*Schema (xsd) file*);
*/
}
catch (Exception $e)
{
error_log($e);
// Failed...
}
}
}
/**
@ -478,8 +565,7 @@ abstract class bo_merge
return $contentstart.implode('\\par \\page\\pard\\plain',$contentrepeatpages).$contentend;
case 'application/vnd.oasis.opendocument.text':
case 'application/vnd.oasis.opendocument.spreadsheet':
// todo OO writer files
break;
return $contentstart.implode('<text:line-break />',$contentrepeatpages).$contentend;
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
return $contentstart.implode('<w:br w:type="page" />',$contentrep).$contentend;
@ -543,6 +629,7 @@ abstract class bo_merge
break;
}
//error_log(__METHOD__."('$document', ... ,$mimetype) --> $charset (egw=".translation::charset().', export='.$this->contacts->prefs['csv_charset'].')');
// do we need to convert charset
if ($charset && $charset != translation::charset())
{
@ -553,6 +640,26 @@ abstract class bo_merge
// Numeric fields
$names = array();
// Tags we can replace with the target document's version
$replace_tags = array();
switch($mimetype.$mso_application_progid)
{
case 'application/vnd.oasis.opendocument.text': // open office
case 'application/vnd.oasis.opendocument.spreadsheet':
$replace_tags = array(
'<b>','<strong>','<i>','<em>','<u>','<span>','<ol>','<ul>','<li>',
'<table>','<tr>','<td>',
);
break;
case 'application/xmlWord.Document': // Word 2003*/
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': // ms office 2007
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
$replace_tags = array(
'<b>','<strong>','<i>','<em>','<u>','<span>','<ol>','<ul>','<li>',
'<table>','<tr>','<td>',
);
break;
}
// clean replacements from array values and html or html-entities, which mess up xml
foreach($replacements as $name => &$value)
{
@ -578,7 +685,7 @@ abstract class bo_merge
{
// replace </p> and <br /> with CRLF (remove <p> and CRLF)
$value = str_replace(array("\r","\n",'<p>','</p>','<br />'),array('','','',"\r\n","\r\n"),$value);
$value = strip_tags($value);
$value = strip_tags($value,implode('',$replace_tags));
}
// replace all control chars (C0+C1) but CR (\015), LF (\012) and TAB (\011) (eg. vertical tabulators) with space
// as they are not allowed in xml
@ -637,13 +744,13 @@ abstract class bo_merge
$break = '<text:line-break/>';
break;
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': // ms word 2007
$break = '<w:br/>';
$break = '</w:t><w:br/><w:t>';
break;
case 'application/xmlExcel.Sheet': // Excel 2003
$break = '&#10;';
break;
case 'application/xmlWord.Document': // Word 2003*/
$break = '<w:br/>';
$break = '</w:t><w:br/><w:t>';
break;
case 'text/html':
$break = '<br/>';
@ -654,7 +761,7 @@ abstract class bo_merge
break;
}
// now decode &, < and >, which need to be encoded as entities in xml
$replacements = str_replace(array('&','<','>',"\r","\n"),array('&amp;','&lt;','&gt;','',$break),$replacements);
$replacements = str_replace(array('&',"\r","\n"),array('&amp;','',$break),$replacements);
}
return str_replace(array_keys($replacements),array_values($replacements),$content);
}
@ -859,6 +966,10 @@ abstract class bo_merge
//error_log(__METHOD__."() !this->merge() err=$err");
return $err;
}
// Apply HTML formatting to target document, if possible
$this->apply_styles($merged, $mimetype);
if(!empty($name))
{
if(empty($ext))

View File

@ -0,0 +1,267 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:str="http://exslt.org/strings"
extension-element-prefixes="str"
>
<xsl:output method="xml" omit-xml-declaration="yes"/>
<xsl:template name="rbga-to-hex">
<xsl:param name="rgba-val"/>
<xsl:param name="count" select="1"/>
<xsl:variable name="val" select="substring-before($rgba-val,',')"/>
<xsl:variable name="tail" select="substring-after($rgba-val,concat($val,','))"/>
<xsl:choose>
<xsl:when test="$count &lt; 3">
<xsl:call-template name="to-hex">
<xsl:with-param name="val" select="$val"/>
</xsl:call-template>
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="count" select="$count + 1"/>
<xsl:with-param name="rgba-val" select="$tail"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="to-hex">
<xsl:with-param name="val" select="$rgba-val"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="to-hex">
<xsl:param name="val"/>
<xsl:param name="max" select="255"/>
<xsl:param name="min" select="0"/>
<xsl:param name="hex-key" select="'0123456789ABCDEF'"/>
<!-- REMOVE NON-NUMERIC CHARACTERS -->
<xsl:variable name="val"
select="translate($val,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,.-_=+!@#$%^*():; ','')"/>
<!-- insure that the rgb value is within 0-255 -->
<xsl:variable name="num">
<xsl:choose>
<xsl:when test="$val &gt; $max">
<xsl:value-of select="$max"/>
</xsl:when>
<xsl:when test="$val &lt; $min">
<xsl:value-of select="$min"/>
</xsl:when>
<!-- insure that we have whole numbers -->
<xsl:otherwise>
<xsl:value-of select="round($val)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<!-- Return Hex Val -->
<!-- substring(string, position, length) -->
<xsl:value-of select="concat( substring($hex-key,(ceiling(($num - ceiling($num mod 16)) div 16)+1),1),
substring($hex-key,($num mod 16)+1,1)
)"/>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<!-- Try to do replacements completely in XSLT
-->
<!-- w:p doesn't work right now
<xsl:template match="w:p[descendant::ul|descendant::ol]">
<xsl:for-each select="node()|@*">
<xsl:choose>
<xsl:when test="descendant::ul|descendant::ol" >
<xsl:variable name="current" select="." />
<xsl:variable name="break" select="descendant::*[ul|ol|table]" />
Breakers
<xsl:copy-of select="$break" />
</xsl:template>
-->
<xsl:template name="apply-styles" match="w:r[descendant::strong|descendant::em|descendant::u|descendant::span]">
<xsl:for-each select="node()|@*[not(w:rPr)]">
<xsl:choose>
<xsl:when test="descendant::strong|descendant::em|descendant::u|descendant::span" >
<xsl:for-each select="node()|@*">
<xsl:choose>
<xsl:when test="descendant-or-self::strong|descendant-or-self::em|descendant-or-self::u|descendant-or-self::span" >
<w:r>
<w:rPr>
<xsl:apply-templates select=".|child::*" />
</w:rPr>
<w:t xml:space="preserve"><xsl:value-of select="." /></w:t>
</w:r>
</xsl:when>
<xsl:otherwise>
<w:r><w:t xml:space="preserve"><xsl:copy-of select="." /></w:t></w:r>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<w:r>
<xsl:copy-of select="." />
</w:r>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<!-- Fix any bad breaks -->
<xsl:template match="w:t[child::w:br]">
<w:t>
<xsl:copy-of select="text()"/>
</w:t>
<w:br/>
</xsl:template>
<xsl:template match="i|em">
<w:i />
</xsl:template>
<xsl:template match="b|strong">
<w:b />
</xsl:template>
<xsl:template match="u">
<w:u w:val="single" />
</xsl:template>
<!-- Color & font -->
<xsl:template match="span">
<xsl:variable name="style" select="str:tokenize(@style,';')" />
<xsl:for-each select="$style">
<xsl:if test="starts-with(.,'color:')">
<xsl:variable name="hex">
<xsl:choose>
<xsl:when test="contains(., 'rgb(')">
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="rgba-val" select="substring-after(.,':')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring-after(.,'#')" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<w:color w:val="{$hex}" />
</xsl:if>
<xsl:if test="starts-with(.,'background-color:')">
<xsl:variable name="hex">
<xsl:choose>
<xsl:when test="contains(., 'rgb(')">
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="rgba-val" select="substring-after(.,':')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring-after(.,'#')" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<w:shd w:fill="{$hex}"/>
</xsl:if>
<xsl:if test="starts-with(.,'font-size')">
<xsl:variable name="font-size" select="substring-after(text(),'font-size:')" />
<!-- Approximate conversion that seems to work -->
<xsl:variable name="size" select="ceiling(number(translate($font-size,translate($font-size,'0123456789',''),''))*2)"/>
<w:sz w:val="{$size}"/>
<w:szCs w:val="{$size}"/>
</xsl:if>
<xsl:if test="starts-with(., 'font-family:')">
<xsl:variable name="font-name" select="translate(substring-before(substring-after(.,'font-family:'),','),&quot;&#39;&quot;,'')" />
<w:rFonts w:ascii="{$font-name}" w:hAnsi="{$font-name}"/>
</xsl:if>
</xsl:for-each>
<xsl:apply-templates select="./span"/>
</xsl:template>
<!--
Unordered (bullet) & ordered (number) list
Faked using text.
-->
<xsl:template match="ul[child::li]|ol[child::li]">
<xsl:for-each select="./li">
<w:p>
<w:pPr>
<w:tabs>
<w:tab w:leader="none" w:pos="707" w:val="left"/>
</w:tabs>
<w:ind w:hanging="283" w:left="707" w:right="0"/>
<w:spacing w:after="0" w:before="0"/>
</w:pPr>
<w:r>
<xsl:choose>
<xsl:when test="name(..)='ol'">
<w:rPr>
<w:rFonts w:hint="default"/>
</w:rPr>
<w:t><xsl:number value="position()" format="1" />.</w:t><w:tab />
</xsl:when>
<xsl:otherwise>
<w:rPr>
<w:rFonts w:ascii="Symbol" w:cs="Symbol" w:hAnsi="Symbol" w:hint="default"/>
</w:rPr>
<w:t>&#xB7;</w:t><w:tab/>
</xsl:otherwise>
</xsl:choose>
</w:r><w:r>
<w:t><xsl:value-of select="normalize-space(text())" /></w:t>
</w:r>
</w:p>
</xsl:for-each>
</xsl:template>
<!-- HTML Table -->
<xsl:template match="table">
<w:tbl>
<w:tblPr>
<w:tblW w:type="pct" w:w="4500"/>
<w:jc w:value="left"/>
<xsl:if test="@border &gt; 0">
<w:tblBorders>
<xsl:variable name="width" select="number(@border)*2"/>
<w:top w:color="000000" w:space="0" w:sz="{$width}" w:val="single"/>
<w:left w:color="000000" w:space="0" w:sz="{$width}" w:val="single"/>
<w:bottom w:color="000000" w:space="0" w:sz="{$width}" w:val="single"/>
<w:right w:color="000000" w:space="0" w:sz="{$width}" w:val="single"/>
</w:tblBorders>
</xsl:if>
</w:tblPr>
<w:tblGrid>
<xsl:for-each select="./tr[1]/td">
<w:gridCol />
</xsl:for-each>
</w:tblGrid>
<xsl:for-each select="./tr">
<w:tr>
<xsl:for-each select="./td">
<w:tc>
<xsl:if test="../../@border &gt; 0">
<w:tcPr><w:tcBorders>
<xsl:variable name="width" select="number(../../@border)*2"/>
<w:top w:color="000000" w:space="0" w:sz="{$width}" w:val="single"/>
<w:left w:color="000000" w:space="0" w:sz="{$width}" w:val="single"/>
<w:bottom w:color="000000" w:space="0" w:sz="{$width}" w:val="single"/>
<w:right w:color="000000" w:space="0" w:sz="{$width}" w:val="single"/>
</w:tcBorders></w:tcPr>
</xsl:if>
<w:p>
<xsl:call-template name="apply-styles">
<w:r>
<xsl:copy-of select="node()|@*"/>
</w:r>
</xsl:call-template>
</w:p>
</w:tc>
</xsl:for-each>
</w:tr>
</xsl:for-each>
</w:tbl>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,367 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
xmlns:str="http://exslt.org/strings"
extension-element-prefixes="str"
>
<xsl:output method="xml" omit-xml-declaration="yes"/>
<xsl:variable name="custom-styles">
<style:style style:name="Custom" style:family="text" />
</xsl:variable>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<!-- Fonts -->
<xsl:template match="office:font-face-decls">
<xsl:copy>
<xsl:apply-templates/>
<xsl:call-template name="extract-fonts"/>
</xsl:copy>
</xsl:template>
<!-- Add in some known styles -->
<xsl:template match="office:automatic-styles">
<xsl:copy>
<xsl:apply-templates/>
<style:style style:name="Tbold" style:family="text">
<style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"/>
</style:style>
<style:style style:name="Titalics" style:family="text">
<style:text-properties fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"/>
</style:style>
<style:style style:name="Tunderline" style:family="text">
<style:text-properties style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="font-color"/>
</style:style>
<xsl:copy-of select="$custom-styles" />
<xsl:call-template name="extract-styles" />
<!-- Pre-made styles from http://fisheye.liip.ch/browse/PUB/fluxcms/branches/matrix/inc/bx/editors/ooo/html2odt.xsl?r=9331 -->
<style:style style:name="Pol" style:family="paragraph" style:parent-style-name="Standard" style:list-style-name="LO">
<style:text-properties style:text-position="0% 100%"/>
</style:style>
<style:style style:name="Pul" style:family="paragraph" style:parent-style-name="Standard" style:list-style-name="LU">
<style:text-properties style:text-position="0% 100%"/>
</style:style>
<style:style style:name="TableX" style:family="table">
<style:table-properties style:width="16.999cm" table:align="margins"/>
</style:style>
<style:style style:name="TableX.A" style:family="table-column">
<style:table-column-properties style:column-width="5.666cm" style:rel-column-width="21845*"/>
</style:style>
<style:style style:name="TableX.A1" style:family="table-cell">
<style:table-cell-properties fo:padding="0.097cm" fo:border="0.002cm solid #000000" />
</style:style>
<style:style style:name="Numbering_20_Symbols" style:display-name="Numbering Symbols" style:family="text"/>
<style:style style:name="Bullet_20_Symbols" style:display-name="Bullet Symbols" style:family="text">
<style:text-properties style:font-name="StarSymbol" fo:font-size="9pt" style:font-name-asian="StarSymbol" style:font-size-asian="9pt" style:font-name-complex="StarSymbol" style:font-size-complex="9pt"/>
</style:style>
<text:list-style style:name="LO">
<text:list-level-style-number text:level="1" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="0.635cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="2" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="1.27cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="3" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="1.905cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="4" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="2.54cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="5" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="3.175cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="6" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="3.81cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="7" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="4.445cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="8" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="5.08cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="9" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="5.715cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
<text:list-level-style-number text:level="10" text:style-name="Numbering_20_Symbols" style:num-suffix="." style:num-format="1">
<style:list-level-properties text:space-before="6.35cm" text:min-label-width="0.635cm"/>
</text:list-level-style-number>
</text:list-style>
<text:list-style style:name="LU">
<text:list-level-style-bullet text:level="1" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="0.635cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="2" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="1.27cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="3" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="1.905cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="4" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="2.54cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="5" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="3.175cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="6" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="3.81cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="7" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="4.445cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="8" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="5.08cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="9" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="5.715cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
<text:list-level-style-bullet text:level="10" text:style-name="Bullet_20_Symbols" style:num-suffix="." text:bullet-char="&#8226;">
<style:list-level-properties text:space-before="6.35cm" text:min-label-width="0.635cm"/>
<style:text-properties style:font-name="StarSymbol"/>
</text:list-level-style-bullet>
</text:list-style>
</xsl:copy>
</xsl:template>
<!-- Generate custom styles based on the span styles -->
<xsl:template name="extract-fonts">
<xsl:for-each select="//span[@style]">
<xsl:variable name="style" select="str:tokenize(@style,';')" />
<xsl:for-each select="$style">
<xsl:choose>
<xsl:when test="starts-with(.,'font-family:')">
<xsl:variable name="font-name" select="translate(substring-before(substring-after(.,'font-family:'),','),&quot;&#39;&quot;,'')" />
<xsl:variable name="generic" select="translate(substring-before(substring-after(.,','),','),&quot;&#39; &quot; ,'')" />
<style:font-face style:name="{$font-name}" svg:font-family="{$font-name}" style:font-family-generic="{$generic}" />
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:for-each>
</xsl:template>
<xsl:template name="extract-styles">
<xsl:for-each select="//span[@style]">
<xsl:variable name="style" select="str:tokenize(@style,';')" />
<style:style style:name="TSpan{generate-id(.)}" style:family="text">
<xsl:for-each select="$style">
<xsl:choose>
<xsl:when test="starts-with(.,'color:')">
<xsl:variable name="hex">
<xsl:choose>
<xsl:when test="contains(., 'rgb(')">
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="rgba-val" select="substring-after(.,':')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring-after(.,'#')"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<style:text-properties fo:color="#{$hex}"/>
</xsl:when>
<xsl:when test="starts-with(.,'background-color:')">
<xsl:variable name="hex">
<xsl:choose>
<xsl:when test="contains(., 'rgb(')">
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="rgba-val" select="substring-after(.,':')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring-after(.,'#')"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<style:text-properties fo:background-color="#{$hex}"/>
</xsl:when>
<xsl:when test="starts-with(.,'font-size:')">
<xsl:variable name="font-size" select="substring-after(text(),'font-size:')" />
<!-- Approximate conversion that seems to work -->
<xsl:variable name="size" select="ceiling(number(translate($font-size,translate($font-size,'0123456789',''),'')))"/>
<style:text-properties fo:font-size="{$size}pt"/>
</xsl:when>
<xsl:when test="starts-with(.,'font-family:')">
<xsl:variable name="font-name" select="translate(substring-before(substring-after(.,'font-family:'),','),&quot;&#39;&quot;,'')" />
<style:text-properties style:font-name="{$font-name}"/>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</style:style>
</xsl:for-each>
</xsl:template>
<!-- Simple, use known styles -->
<xsl:template match="strong">
<text:span text:style-name="Tbold"><xsl:apply-templates/></text:span>
</xsl:template>
<xsl:template match="em|i">
<text:span text:style-name="Titalics"><xsl:apply-templates/></text:span>
</xsl:template>
<xsl:template match="u">
<text:span text:style-name="Tunderline"><xsl:apply-templates/></text:span>
</xsl:template>
<xsl:template match="ul[ancestor::office:text]">
<text:list text:style-name="LU">
<xsl:apply-templates/>
</text:list>
</xsl:template>
<xsl:template match="ol[ancestor::office:text]">
<text:list text:style-name="LO">
<xsl:apply-templates/>
</text:list>
</xsl:template>
<!-- You can't have lists in a table? Doesn't work in calc at least, so fake it-->
<xsl:template match="ul[ancestor::office:spreadsheet] | ol[ancestor::office:spreadsheet]" >
<text:p><xsl:apply-templates/></text:p>
</xsl:template>
<xsl:template match="ul[ancestor::office:spreadsheet]/li | ol[ancestor::office:spreadsheet]/li" >
<text:tab-stop />&#8226; <xsl:value-of select="normalize-space()" /><text:line-break />
</xsl:template>
<xsl:template match="li">
<xsl:variable name="list_style">
<xsl:choose>
<xsl:when test="name(..) = 'ul'">Pul</xsl:when>
<xsl:otherwise>Pol</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<text:list-item><text:p text:style-name="{$list_style}">
<xsl:apply-templates/>
</text:p></text:list-item>
</xsl:template>
<xsl:template match="table[ancestor::office:text]">
<table:table table:name="Table{generate-id(.)}" table:style-name="TableX">
<table:table-column table:style-name="TableX.A" table:number-columns-repeated="{count(tr[position() = 1]/td | tr[position() = 1]/th)}"/>
<xsl:apply-templates/>
</table:table>
</xsl:template>
<!-- You can't have tables in a table? Doesn't work in calc at least, so fake it-->
<xsl:template match="table[ancestor::office:spreadsheet]" >
<text:p>
<xsl:apply-templates/>
</text:p>
</xsl:template>
<xsl:template match="tr[th]">
<table:table-header-rows><table:table-row>
<xsl:apply-templates/>
</table:table-row></table:table-header-rows>
</xsl:template>
<xsl:template match="th">
<table:table-cell table:style-name="TableX.A1">
<xsl:apply-templates/>
</table:table-cell>
</xsl:template>
<xsl:template match="td">
<table:table-cell table:style-name="TableX.A1">
<text:p><xsl:apply-templates/></text:p>
</table:table-cell>
</xsl:template>
<xsl:template match="tr">
<table:table-row>
<xsl:apply-templates/>
</table:table-row>
</xsl:template>
<xsl:template match="tr[ancestor::office:spreadsheet]" >
<xsl:apply-templates/><text:line-break />
</xsl:template>
<xsl:template match="a">
<text:a xlink:href="{@href}">
<xsl:apply-templates/>
</text:a>
</xsl:template>
<!-- Need to add styles -->
<xsl:template match="span">
<text:span text:style-name="TSpan{generate-id(.)}"><xsl:apply-templates/></text:span>
</xsl:template>
<!-- Convert rgb(r,g,b) to hex RGB values -->
<xsl:template name="rbga-to-hex">
<xsl:param name="rgba-val"/>
<xsl:param name="count" select="1"/>
<xsl:variable name="val" select="substring-before($rgba-val,',')"/>
<xsl:variable name="tail" select="substring-after($rgba-val,concat($val,','))"/>
<xsl:choose>
<xsl:when test="$count &lt; 3">
<xsl:call-template name="to-hex">
<xsl:with-param name="val" select="$val"/>
</xsl:call-template>
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="count" select="$count + 1"/>
<xsl:with-param name="rgba-val" select="$tail"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="to-hex">
<xsl:with-param name="val" select="$rgba-val"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="to-hex">
<xsl:param name="val"/>
<xsl:param name="max" select="255"/>
<xsl:param name="min" select="0"/>
<xsl:param name="hex-key" select="'0123456789abcdef'"/>
<!-- REMOVE NON-NUMERIC CHARACTERS -->
<xsl:variable name="val"
select="translate($val,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,.-_=+!@#$%^*():; ','')"/>
<!-- insure that the rgb value is within 0-255 -->
<xsl:variable name="num">
<xsl:choose>
<xsl:when test="$val &gt; $max">
<xsl:value-of select="$max"/>
</xsl:when>
<xsl:when test="$val &lt; $min">
<xsl:value-of select="$min"/>
</xsl:when>
<!-- insure that we have whole numbers -->
<xsl:otherwise>
<xsl:value-of select="round($val)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<!-- Return Hex Val -->
<!-- substring(string, position, length) -->
<xsl:value-of select="concat( substring($hex-key,(ceiling(($num - ceiling($num mod 16)) div 16)+1),1),
substring($hex-key,($num mod 16)+1,1)
)"/>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,247 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:w="http://schemas.microsoft.com/office/word/2003/wordml"
xmlns:str="http://exslt.org/strings"
extension-element-prefixes="str"
>
<!-- xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" -->
<xsl:output method="xml" omit-xml-declaration="yes"/>
<xsl:template name="rbga-to-hex">
<xsl:param name="rgba-val"/>
<xsl:param name="count" select="1"/>
<xsl:variable name="val" select="substring-before($rgba-val,',')"/>
<xsl:variable name="tail" select="substring-after($rgba-val,concat($val,','))"/>
<xsl:choose>
<xsl:when test="$count &lt; 3">
<xsl:call-template name="to-hex">
<xsl:with-param name="val" select="$val"/>
</xsl:call-template>
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="count" select="$count + 1"/>
<xsl:with-param name="rgba-val" select="$tail"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="to-hex">
<xsl:with-param name="val" select="$rgba-val"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="to-hex">
<xsl:param name="val"/>
<xsl:param name="max" select="255"/>
<xsl:param name="min" select="0"/>
<xsl:param name="hex-key" select="'0123456789ABCDEF'"/>
<!-- REMOVE NON-NUMERIC CHARACTERS -->
<xsl:variable name="val"
select="translate($val,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,.-_=+!@#$%^*():; ','')"/>
<!-- insure that the rgb value is within 0-255 -->
<xsl:variable name="num">
<xsl:choose>
<xsl:when test="$val &gt; $max">
<xsl:value-of select="$max"/>
</xsl:when>
<xsl:when test="$val &lt; $min">
<xsl:value-of select="$min"/>
</xsl:when>
<!-- insure that we have whole numbers -->
<xsl:otherwise>
<xsl:value-of select="round($val)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<!-- Return Hex Val -->
<!-- substring(string, position, length) -->
<xsl:value-of select="concat( substring($hex-key,(ceiling(($num - ceiling($num mod 16)) div 16)+1),1),
substring($hex-key,($num mod 16)+1,1)
)"/>
</xsl:template>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<!-- Try to do replacements completely in XSLT
-->
<!-- w:p doesn't work right now
<xsl:template match="w:p[descendant::ul|descendant::ol]">
<xsl:for-each select="node()|@*">
<xsl:choose>
<xsl:when test="descendant::ul|descendant::ol" >
<xsl:variable name="current" select="." />
<xsl:variable name="break" select="descendant::*[ul|ol|table]" />
Breakers
<xsl:copy-of select="$break" />
</xsl:template>
-->
<xsl:template match="w:r[descendant::strong|descendant::em|descendant::u|descendant::span]" name="apply-styles">
<xsl:for-each select="node()|@*[not(w:rPr)]">
<xsl:choose>
<xsl:when test="descendant::strong|descendant::em|descendant::u|descendant::span" >
<xsl:for-each select="node()|@*">
<xsl:choose>
<xsl:when test="descendant-or-self::strong|descendant-or-self::em|descendant-or-self::u|descendant-or-self::span" >
<w:r>
<w:rPr>
<xsl:apply-templates select=".|child::*" />
</w:rPr>
<w:t><xsl:value-of select="." /></w:t>
</w:r>
</xsl:when>
<xsl:otherwise>
<w:r><w:t><xsl:copy-of select="." /></w:t></w:r>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<w:r>
<xsl:copy-of select="." />
</w:r>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<!-- Fix any bad breaks -->
<xsl:template match="w:t[child::w:br]">
<w:t>
<xsl:copy-of select="text()"/>
</w:t>
<w:br/>
</xsl:template>
<xsl:template match="i|em">
<w:i />
</xsl:template>
<xsl:template match="b|strong">
<w:b />
</xsl:template>
<xsl:template match="u">
<w:u w:val="single" />
</xsl:template>
<!-- Color & font -->
<xsl:template match="span">
<xsl:variable name="style" select="str:tokenize(@style,';')" />
<xsl:for-each select="$style">
<xsl:if test="starts-with(.,'color:')">
<xsl:variable name="hex">
<xsl:choose>
<xsl:when test="contains(., 'rgb(')">
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="rgba-val" select="substring-after(.,':')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring-after(.,'#')" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<w:color w:val="{$hex}" />
</xsl:if>
<xsl:if test="starts-with(.,'background-color:')">
<xsl:variable name="hex">
<xsl:choose>
<xsl:when test="contains(., 'rgb(')">
<xsl:call-template name="rbga-to-hex">
<xsl:with-param name="rgba-val" select="substring-after(.,':')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring-after(.,'#')" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<w:shd w:fill="{$hex}" w:val="clear"/>
</xsl:if>
<xsl:if test="starts-with(.,'font-size')">
<xsl:variable name="font-size" select="substring-after(text(),'font-size:')" />
<!-- Approximate conversion that seems to work -->
<xsl:variable name="size" select="ceiling(number(translate($font-size,translate($font-size,'0123456789',''),''))*1.5)"/>
<w:sz w:val="{$size}"/>
</xsl:if>
<xsl:if test="starts-with(., 'font-family:')">
<xsl:variable name="font-name" select="translate(substring-before(substring-after(.,'font-family:'),','),&quot;&#39;&quot;,'')" />
<w:rFonts w:ascii="{$font-name}" />
</xsl:if>
</xsl:for-each>
<xsl:apply-templates select="./span"/>
</xsl:template>
<!--
Unordered (bullet) list
Numbers determined by examining a docx file from OpenOffice.org
-->
<xsl:template match="ul[child::li]|ol[child::li]">
<xsl:for-each select="./li">
<w:p>
<w:pPr>
<w:tabs>
<w:tab w:leader="none" w:pos="707" w:val="left"/>
</w:tabs>
<w:ind w:hanging="283" w:left="707" w:right="0"/>
</w:pPr>
<w:r>
<xsl:choose>
<xsl:when test="name(..)='ol'">
<w:t><xsl:number value="position()" format="1" />.</w:t><w:tab/>
</xsl:when>
<xsl:otherwise>
<w:rPr>
<w:rFonts w:ascii="Symbol" w:cs="Symbol" w:hint="default"/>
</w:rPr>
<w:t>&#xB7;</w:t><w:tab/>
</xsl:otherwise>
</xsl:choose>
</w:r><w:r>
<w:t><xsl:value-of select="normalize-space(text())" /></w:t>
</w:r>
</w:p>
</xsl:for-each>
</xsl:template>
<!-- HTML Table -->
<xsl:template match="table">
<w:tbl>
<w:tblPr>
<w:jc w:val="left"/>
<w:tblW w:w="5000" w:type="pct"/>
</w:tblPr>
<w:tblGrid>
<xsl:for-each select="./tr[1]/td">
<w:gridCol />
</xsl:for-each>
</w:tblGrid>
<xsl:for-each select="./tr">
<w:tr>
<xsl:for-each select="./td">
<w:tc>
<w:p>
<xsl:call-template name="apply-styles">
<w:r>
<xsl:copy-of select="node()|@*"/>
</w:r>
</xsl:call-template>
</w:p>
</w:tc>
</xsl:for-each>
</w:tr>
</xsl:for-each>
</w:tbl>
</xsl:template>
</xsl:stylesheet>