mirror of
https://github.com/EGroupware/egroupware.git
synced 2024-11-23 16:33:17 +01:00
664 lines
20 KiB
PHP
664 lines
20 KiB
PHP
<?php
|
|
|
|
include_once 'XML/WBXML.php';
|
|
include_once 'XML/WBXML/DTDManager.php';
|
|
include_once 'XML/WBXML/ContentHandler.php';
|
|
|
|
/**
|
|
* $Horde: framework/XML_WBXML/WBXML/Decoder.php,v 1.36 2006/01/01 21:10:25 jan Exp $
|
|
*
|
|
* Copyright 2003-2006 Anthony Mills <amills@pyramid6.com>
|
|
*
|
|
* See the enclosed file COPYING for license information (LGPL). If you
|
|
* did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
|
|
*
|
|
* From Binary XML Content Format Specification Version 1.3, 25 July
|
|
* 2001 found at http://www.wapforum.org
|
|
*
|
|
* @package XML_WBXML
|
|
*/
|
|
class XML_WBXML_Decoder extends XML_WBXML_ContentHandler {
|
|
|
|
/**
|
|
* Document Public Identifier type
|
|
* 1 mb_u_int32 well known type
|
|
* 2 string table
|
|
* from spec but converted into a string.
|
|
*
|
|
* Document Public Identifier
|
|
* Used with dpiType.
|
|
*/
|
|
var $_dpi;
|
|
|
|
/**
|
|
* String table as defined in 5.7
|
|
*/
|
|
var $_stringTable = array();
|
|
|
|
/**
|
|
* Content handler.
|
|
* Currently just outputs raw XML.
|
|
*/
|
|
var $_ch;
|
|
|
|
var $_tagDTD;
|
|
|
|
var $_prevAttributeDTD;
|
|
|
|
var $_attributeDTD;
|
|
|
|
/**
|
|
* State variables.
|
|
*/
|
|
var $_tagStack = array();
|
|
var $_isAttribute;
|
|
var $_isData = false;
|
|
|
|
var $_error = false;
|
|
|
|
/**
|
|
* The DTD Manager.
|
|
*
|
|
* @var XML_WBXML_DTDManager
|
|
*/
|
|
var $_dtdManager;
|
|
|
|
/**
|
|
* The string position.
|
|
*
|
|
* @var integer
|
|
*/
|
|
var $_strpos;
|
|
|
|
/**
|
|
* Constructor.
|
|
*/
|
|
function XML_WBXML_Decoder()
|
|
{
|
|
$this->_dtdManager = &new XML_WBXML_DTDManager();
|
|
}
|
|
|
|
/**
|
|
* Sets the contentHandler that will receive the output of the
|
|
* decoding.
|
|
*
|
|
* @param XML_WBXML_ContentHandler $ch The contentHandler
|
|
*/
|
|
function setContentHandler(&$ch) {
|
|
$this->_ch = &$ch;
|
|
}
|
|
/**
|
|
* Return one byte from the input stream.
|
|
*
|
|
* @param string $input The WBXML input string.
|
|
*/
|
|
function getByte($input)
|
|
{
|
|
$value = $input{$this->_strpos++};
|
|
$value = ord($value);
|
|
|
|
return $value;
|
|
}
|
|
|
|
/**
|
|
* Takes a WBXML input document and returns decoded XML.
|
|
* However the preferred and more effecient method is to
|
|
* use decode() rather than decodeToString() and have an
|
|
* appropriate contentHandler deal with the decoded data.
|
|
*
|
|
* @param string $wbxml The WBXML document to decode.
|
|
*
|
|
* @return string The decoded XML document.
|
|
*/
|
|
function decodeToString($wbxml)
|
|
{
|
|
$this->_ch = &new XML_WBXML_ContentHandler();
|
|
|
|
$r = $this->decode($wbxml);
|
|
if (is_a($r, 'PEAR_Error')) {
|
|
return $r;
|
|
}
|
|
return $this->_ch->getOutput();
|
|
}
|
|
|
|
/**
|
|
* Takes a WBXML input document and decodes it.
|
|
* Decoding result is directly passed to the contentHandler.
|
|
* A contenthandler must be set using setContentHandler
|
|
* prior to invocation of this method
|
|
*
|
|
* @param string $wbxml The WBXML document to decode.
|
|
*
|
|
* @return mixed True on success or PEAR_Error.
|
|
*/
|
|
function decode($wbxml)
|
|
{
|
|
$this->_error = false; // reset state
|
|
$this->_strpos = 0;
|
|
|
|
if (empty($this->_ch)) {
|
|
return $this->raiseError('No Contenthandler defined.');
|
|
}
|
|
|
|
// Get Version Number from Section 5.4
|
|
// version = u_int8
|
|
// currently 1, 2 or 3
|
|
$this->_wbxmlVersion = $this->getVersionNumber($wbxml);
|
|
|
|
// Get Document Public Idetifier from Section 5.5
|
|
// publicid = mb_u_int32 | (zero index)
|
|
// zero = u_int8
|
|
// Containing the value zero (0)
|
|
// The actual DPI is determined after the String Table is read.
|
|
$dpiStruct = $this->getDocumentPublicIdentifier($wbxml);
|
|
// Get Charset from 5.6
|
|
// charset = mb_u_int32
|
|
$this->_charset = $this->getCharset($wbxml);
|
|
|
|
// Get String Table from 5.7
|
|
// strb1 = length *byte
|
|
$this->retrieveStringTable($wbxml);
|
|
|
|
// Get Document Public Idetifier from Section 5.5.
|
|
$this->_dpi = $this->getDocumentPublicIdentifierImpl($dpiStruct['dpiType'],
|
|
$dpiStruct['dpiNumber'],
|
|
$this->_stringTable);
|
|
|
|
// Now the real fun begins.
|
|
// From Sections 5.2 and 5.8
|
|
|
|
|
|
// Default content handler.
|
|
$this->_dtdManager = &new XML_WBXML_DTDManager();
|
|
|
|
// Get the starting DTD.
|
|
$this->_tagDTD = $this->_dtdManager->getInstance($this->_dpi);
|
|
|
|
if (!$this->_tagDTD) {
|
|
return $this->raiseError('No DTD found for '
|
|
. $this->_dpi . '/'
|
|
. $dpiStruct['dpiNumber']);
|
|
}
|
|
|
|
$this->_attributeDTD = $this->_tagDTD;
|
|
|
|
while (empty($this->_error) && $this->_strpos < strlen($wbxml)) {
|
|
$this->_decode($wbxml);
|
|
}
|
|
if (!empty($this->_error)) {
|
|
return $this->_error;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
function getVersionNumber($input)
|
|
{
|
|
return $this->getByte($input);
|
|
}
|
|
|
|
function getDocumentPublicIdentifier($input)
|
|
{
|
|
$i = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
|
|
if ($i == 0) {
|
|
return array('dpiType' => 2,
|
|
'dpiNumber' => $this->getByte($input));
|
|
} else {
|
|
return array('dpiType' => 1,
|
|
'dpiNumber' => $i);
|
|
}
|
|
}
|
|
|
|
function getDocumentPublicIdentifierImpl($dpiType, $dpiNumber)
|
|
{
|
|
if ($dpiType == 1) {
|
|
return XML_WBXML::getDPIString($dpiNumber);
|
|
} else {
|
|
return $this->getStringTableEntry($dpiNumber);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the character encoding. Only default character
|
|
* encodings from J2SE are supported. From
|
|
* http://www.iana.org/assignments/character-sets and
|
|
* http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html
|
|
*/
|
|
function getCharset($input)
|
|
{
|
|
$cs = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
|
|
return XML_WBXML::getCharsetString($cs);
|
|
}
|
|
|
|
/**
|
|
* Retrieves the string table.
|
|
* The string table consists of an mb_u_int32 length
|
|
* and then length bytes forming the table.
|
|
* References to the string table refer to the
|
|
* starting position of the (null terminated)
|
|
* string in this table.
|
|
*/
|
|
function retrieveStringTable($input)
|
|
{
|
|
$size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
|
|
$this->_stringTable = substr($input, $this->_strpos, $size);
|
|
$this->_strpos += $size;
|
|
// print "stringtable($size):" . $this->_stringTable ."\n";
|
|
}
|
|
|
|
function getStringTableEntry($index)
|
|
{
|
|
if ($index >= strlen($this->_stringTable)) {
|
|
$this->_error =
|
|
$this->_ch->raiseError('Invalid offset ' . $index
|
|
. ' value encountered around position '
|
|
. $this->_strpos
|
|
. '. Broken wbxml?');
|
|
return '';
|
|
}
|
|
|
|
// copy of method termstr but without modification of this->_strpos
|
|
|
|
$str = '#'; // must start with nonempty string to allow array access
|
|
|
|
$i = 0;
|
|
$ch = $this->_stringTable[$index++];
|
|
if (ord($ch) == 0) {
|
|
return ''; // don't return '#'
|
|
}
|
|
|
|
while (ord($ch) != 0) {
|
|
$str[$i++] = $ch;
|
|
if ($index >= strlen($this->_stringTable)) {
|
|
break;
|
|
}
|
|
$ch = $this->_stringTable[$index++];
|
|
}
|
|
// print "string table entry: $str\n";
|
|
return $str;
|
|
|
|
}
|
|
|
|
function _decode($input)
|
|
{
|
|
$token = $this->getByte($input);
|
|
$str = '';
|
|
|
|
#print "position: " . $this->_strpos . " token: " . $token . " str10: " . substr($input, $this->_strpos, 10) . "\n"; // @todo: remove debug output
|
|
|
|
switch ($token) {
|
|
case XML_WBXML_GLOBAL_TOKEN_STR_I:
|
|
// Section 5.8.4.1
|
|
$str = $this->termstr($input);
|
|
$this->_ch->characters($str);
|
|
// print "str:$str\n"; // @TODO Remove debug code
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_STR_T:
|
|
// Section 5.8.4.1
|
|
$x = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
|
|
$str = $this->getStringTableEntry($x);
|
|
$this->_ch->characters($str);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_I_0:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_I_1:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_I_2:
|
|
// Section 5.8.4.2
|
|
$str = $this->termstr($input);
|
|
$this->_ch->characters($str);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_T_0:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_T_1:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_T_2:
|
|
// Section 5.8.4.2
|
|
$str = $this->getStringTableEnty(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
$this->_ch->characters($str);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_0:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_1:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_2:
|
|
// Section 5.8.4.2
|
|
$extension = $this->getByte($input);
|
|
$this->_ch->characters($extension);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_ENTITY:
|
|
// Section 5.8.4.3
|
|
// UCS-4 chracter encoding?
|
|
$entity = $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
|
|
$this->_ch->characters('&#' . $entity . ';');
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_PI:
|
|
// Section 5.8.4.4
|
|
// throw new IOException
|
|
// die("WBXML global token processing instruction(PI, " + token + ") is unsupported!\n");
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_LITERAL:
|
|
// Section 5.8.4.5
|
|
$str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
$this->parseTag($input, $str, false, false);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_LITERAL_A:
|
|
// Section 5.8.4.5
|
|
$str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
$this->parseTag($input, $str, true, false);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_LITERAL_AC:
|
|
// Section 5.8.4.5
|
|
$str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
$this->parseTag($input, $string, true, true);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_LITERAL_C:
|
|
// Section 5.8.4.5
|
|
$str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
$this->parseTag($input, $str, false, true);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_OPAQUE:
|
|
// Section 5.8.4.6
|
|
$size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
|
|
// print "opaque of size $size\n"; // @todo remove debug
|
|
$b = substr($input, $this->_strpos, $size);
|
|
#$b = mb_substr($input, $this->_strpos, $size, 'ISO-8859-1');
|
|
$this->_strpos += $size;
|
|
|
|
// opaque data inside a <data> element may or may not be
|
|
// a nested wbxml document (for example devinf data).
|
|
// We find out by checking the first byte of the data: if it's
|
|
// 1, 2 or 3 we expect it to be the version number of a wbxml
|
|
// document and thus start a new wbxml decoder instance on it.
|
|
|
|
if ($this->_isData && ord($b) <= 10) {
|
|
$decoder = &new XML_WBXML_Decoder(true);
|
|
$decoder->setContentHandler($this->_ch);
|
|
$s = $decoder->decode($b);
|
|
// /* // @todo: FIXME currently we can't decode Nokia
|
|
// DevInf data. So ignore error for the time beeing.
|
|
if (is_a($s, 'PEAR_Error')) {
|
|
$this->_error = $s;
|
|
return;
|
|
}
|
|
// */
|
|
// $this->_ch->characters($s);
|
|
} else {
|
|
/* normal opaque behaviour: just copy the raw data: */
|
|
$this->_ch->characters( $b);
|
|
}
|
|
|
|
// old approach to deal with opaque data inside ContentHandler:
|
|
// FIXME Opaque is used by SYNCML. Opaque data that depends on the context
|
|
// if (contentHandler instanceof OpaqueContentHandler) {
|
|
// ((OpaqueContentHandler)contentHandler).opaque(b);
|
|
// } else {
|
|
// String str = new String(b, 0, size, charset);
|
|
// char[] chars = str.toCharArray();
|
|
|
|
// contentHandler.characters(chars, 0, chars.length);
|
|
// }
|
|
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_END:
|
|
// Section 5.8.4.7.1
|
|
$str = $this->endTag();
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE:
|
|
// Section 5.8.4.7.2
|
|
$codePage = $this->getByte($input);
|
|
// print "switch to codepage $codePage\n"; // @todo: remove debug code
|
|
$this->switchElementCodePage($codePage);
|
|
break;
|
|
|
|
default:
|
|
// Section 5.8.2
|
|
// Section 5.8.3
|
|
$hasAttributes = (($token & 0x80) != 0);
|
|
$hasContent = (($token & 0x40) != 0);
|
|
$realToken = $token & 0x3F;
|
|
$str = $this->getTag($realToken);
|
|
|
|
// print "element:$str\n"; // @TODO Remove debug code
|
|
$this->parseTag($input, $str, $hasAttributes, $hasContent);
|
|
|
|
if ($realToken == 0x0f) {
|
|
// store if we're inside a Data tag. This may contain
|
|
// an additional enclosed wbxml document on which we have
|
|
// to run a seperate encoder
|
|
$this->_isData = true;
|
|
} else {
|
|
$this->_isData = false;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
function parseTag($input, $tag, $hasAttributes, $hasContent)
|
|
{
|
|
$attrs = array();
|
|
if ($hasAttributes) {
|
|
$attrs = $this->getAttributes($input);
|
|
}
|
|
|
|
$this->_ch->startElement($this->getCurrentURI(), $tag, $attrs);
|
|
|
|
if ($hasContent) {
|
|
// FIXME I forgot what does this does. Not sure if this is
|
|
// right?
|
|
$this->_tagStack[] = $tag;
|
|
} else {
|
|
$this->_ch->endElement($this->getCurrentURI(), $tag);
|
|
}
|
|
}
|
|
|
|
function endTag()
|
|
{
|
|
if (count($this->_tagStack)) {
|
|
$tag = array_pop($this->_tagStack);
|
|
} else {
|
|
$tag = 'Unknown';
|
|
}
|
|
|
|
$this->_ch->endElement($this->getCurrentURI(), $tag);
|
|
|
|
return $tag;
|
|
}
|
|
|
|
function getAttributes($input)
|
|
{
|
|
$this->startGetAttributes();
|
|
$hasMoreAttributes = true;
|
|
|
|
$attrs = array();
|
|
$attr = null;
|
|
$value = null;
|
|
$token = null;
|
|
|
|
while ($hasMoreAttributes) {
|
|
$token = $this->getByte($input);
|
|
|
|
switch ($token) {
|
|
// Attribute specified.
|
|
case XML_WBXML_GLOBAL_TOKEN_LITERAL:
|
|
// Section 5.8.4.5
|
|
if (isset($attr)) {
|
|
$attrs[] = array('attribute' => $attr,
|
|
'value' => $value);
|
|
}
|
|
|
|
$attr = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
break;
|
|
|
|
// Value specified.
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_I_0:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_I_1:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_I_2:
|
|
// Section 5.8.4.2
|
|
$value .= $this->termstr($input);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_T_0:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_T_1:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_T_2:
|
|
// Section 5.8.4.2
|
|
$value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_0:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_1:
|
|
case XML_WBXML_GLOBAL_TOKEN_EXT_2:
|
|
// Section 5.8.4.2
|
|
$value .= $input[$this->_strpos++];
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_ENTITY:
|
|
// Section 5.8.4.3
|
|
$value .= $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_STR_I:
|
|
// Section 5.8.4.1
|
|
$value .= $this->termstr($input);
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_STR_T:
|
|
// Section 5.8.4.1
|
|
$value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_OPAQUE:
|
|
// Section 5.8.4.6
|
|
$size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
|
|
$b = substr($input, $this->_strpos, $this->_strpos + $size);
|
|
$this->_strpos += $size;
|
|
|
|
$value .= $b;
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_END:
|
|
// Section 5.8.4.7.1
|
|
$hasMoreAttributes = false;
|
|
if (isset($attr)) {
|
|
$attrs[] = array('attribute' => $attr,
|
|
'value' => $value);
|
|
}
|
|
break;
|
|
|
|
case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE:
|
|
// Section 5.8.4.7.2
|
|
$codePage = $this->getByte($input);
|
|
if (!$this->_prevAttributeDTD) {
|
|
$this->_prevAttributeDTD = $this->_attributeDTD;
|
|
}
|
|
|
|
$this->switchAttributeCodePage($codePage);
|
|
break;
|
|
|
|
default:
|
|
if ($token > 128) {
|
|
if (isset($attr)) {
|
|
$attrs[] = array('attribute' => $attr,
|
|
'value' => $value);
|
|
}
|
|
$attr = $this->_attributeDTD->toAttribute($token);
|
|
} else {
|
|
// Value.
|
|
$value .= $this->_attributeDTD->toAttribute($token);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!$this->_prevAttributeDTD) {
|
|
$this->_attributeDTD = $this->_prevAttributeDTD;
|
|
$this->_prevAttributeDTD = false;
|
|
}
|
|
|
|
$this->stopGetAttributes();
|
|
}
|
|
|
|
function startGetAttributes()
|
|
{
|
|
$this->_isAttribute = true;
|
|
}
|
|
|
|
function stopGetAttributes()
|
|
{
|
|
$this->_isAttribute = false;
|
|
}
|
|
|
|
function getCurrentURI()
|
|
{
|
|
if ($this->_isAttribute) {
|
|
return $this->_tagDTD->getURI();
|
|
} else {
|
|
return $this->_attributeDTD->getURI();
|
|
}
|
|
}
|
|
|
|
function writeString($str)
|
|
{
|
|
$this->_ch->characters($str);
|
|
}
|
|
|
|
function getTag($tag)
|
|
{
|
|
// Should know which state it is in.
|
|
return $this->_tagDTD->toTagStr($tag);
|
|
}
|
|
|
|
function getAttribute($attribute)
|
|
{
|
|
// Should know which state it is in.
|
|
$this->_attributeDTD->toAttributeInt($attribute);
|
|
}
|
|
|
|
function switchElementCodePage($codePage)
|
|
{
|
|
$this->_tagDTD = &$this->_dtdManager->getInstance($this->_tagDTD->toCodePageStr($codePage));
|
|
$this->switchAttributeCodePage($codePage);
|
|
}
|
|
|
|
function switchAttributeCodePage($codePage)
|
|
{
|
|
$this->_attributeDTD = &$this->_dtdManager->getInstance($this->_attributeDTD->toCodePageStr($codePage));
|
|
}
|
|
|
|
/**
|
|
* Return the hex version of the base 10 $entity.
|
|
*/
|
|
function entity($entity)
|
|
{
|
|
return dechex($entity);
|
|
}
|
|
|
|
/**
|
|
* Reads a null terminated string.
|
|
*/
|
|
function termstr($input)
|
|
{
|
|
$str = '#'; // must start with nonempty string to allow array access
|
|
$i = 0;
|
|
$ch = $input[$this->_strpos++];
|
|
if (ord($ch) == 0) {
|
|
return ''; // don't return '#'
|
|
}
|
|
while (ord($ch) != 0) {
|
|
$str[$i++] = $ch;
|
|
$ch = $input[$this->_strpos++];
|
|
}
|
|
|
|
return $str;
|
|
}
|
|
|
|
}
|
|
|