2005-11-05 09:51:06 +01:00
< ? php
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* EGroupware API - Translations
2009-04-20 13:59:39 +02:00
*
2008-03-15 16:30:15 +01:00
* @ link http :// www . egroupware . org
* @ author Joseph Engo < jengo @ phpgroupware . org >
* @ author Dan Kuykendall < seek3r @ phpgroupware . org >
* Copyright ( C ) 2000 , 2001 Joseph Engo
* @ license http :// opensource . org / licenses / lgpl - license . php LGPL - GNU Lesser General Public License
* @ package api
* @ version $Id $
*/
/**
2012-11-05 13:18:22 +01:00
* EGroupware API - Translations
2009-04-20 13:59:39 +02:00
*
* All methods of this class can now be called static .
*
* Translations are cached tree - wide via egw_cache class .
2012-11-05 13:18:22 +01:00
*
* Translations are no longer stored in database , but load directly from *. lang files into cache .
* Only exception as instance specific translations : mainscreen , loginscreen and custom ( see $instance_specific_translations )
2008-03-15 16:30:15 +01:00
*/
class translation
{
2009-04-20 13:59:39 +02:00
/**
* Language of current user , will be set by init ()
2008-03-15 16:30:15 +01:00
*
2009-04-20 13:59:39 +02:00
* @ var string
2008-03-15 16:30:15 +01:00
*/
2009-04-20 13:59:39 +02:00
static $userlang = 'en' ;
2008-03-15 16:30:15 +01:00
/**
2009-04-20 13:59:39 +02:00
* Already loaded translations by applicaton
2008-03-15 16:30:15 +01:00
*
2009-04-20 13:59:39 +02:00
* @ var array $app => $lang pairs
2008-03-15 16:30:15 +01:00
*/
2009-04-20 13:59:39 +02:00
static $loaded_apps = array ();
2008-03-15 16:30:15 +01:00
/**
2009-04-20 13:59:39 +02:00
* Loaded phrases
*
* @ var array $message_id => $translation pairs
2008-03-15 16:30:15 +01:00
*/
2009-04-20 13:59:39 +02:00
static $lang_arr = array ();
2005-11-05 09:51:06 +01:00
2009-04-20 13:59:39 +02:00
/**
* Tables used by this class
*/
const LANG_TABLE = 'egw_lang' ;
const LANGUAGES_TABLE = 'egw_languages' ;
2005-11-05 09:51:06 +01:00
2009-04-20 13:59:39 +02:00
/**
2012-11-05 13:18:22 +01:00
* Directory for language files
2009-04-20 13:59:39 +02:00
*/
2012-11-05 13:18:22 +01:00
const LANG_DIR = 'lang' ;
2009-04-20 13:59:39 +02:00
/**
2012-11-05 13:18:22 +01:00
* Prefix of language files
2009-04-20 13:59:39 +02:00
*/
2012-11-05 13:18:22 +01:00
const LANGFILE_PREFIX = 'egw_' ;
2009-04-20 13:59:39 +02:00
2012-11-05 13:18:22 +01:00
/**
* Prefix of language files
*/
const LANGFILE_EXTENSION = '.lang' ;
2009-04-20 13:59:39 +02:00
/**
* Reference to global db - class
*
* @ var egw_db
*/
static $db ;
/**
* System charset
*
* @ var string
*/
static $system_charset ;
/**
* Is the mbstring extension available
*
* @ var boolean
*/
static $mbstring ;
/**
2014-12-08 20:16:44 +01:00
* Internal encoding / charset of PHP / mbstring ( if loaded )
2009-04-20 13:59:39 +02:00
*
* @ var string
*/
2014-12-08 20:16:44 +01:00
static $default_charset ;
2005-11-05 09:51:06 +01:00
2009-04-28 17:56:04 +02:00
/**
* Application which translations have to be cached instance - and NOT tree - specific
*
* @ var array
*/
2011-11-17 13:43:59 +01:00
static $instance_specific_translations = array ( 'loginscreen' , 'mainscreen' , 'custom' );
2009-04-28 17:56:04 +02:00
2008-03-15 16:30:15 +01:00
/**
* returns the charset to use ( ! $lang ) or the charset of the lang - files or $lang
*
2015-04-26 13:00:33 +02:00
* @ param string / boolean $lang = False return charset of the active user - lang , or $lang if specified
2008-03-15 16:30:15 +01:00
* @ return string charset
*/
2009-04-20 13:59:39 +02:00
static function charset ( $lang = False )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
static $charsets = array ();
2008-03-15 16:30:15 +01:00
if ( $lang )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! isset ( $charsets [ $lang ]))
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! ( $charsets [ $lang ] = self :: $db -> select ( self :: LANG_TABLE , 'content' , array (
2008-03-15 16:30:15 +01:00
'lang' => $lang ,
'message_id' => 'charset' ,
'app_name' => 'common' ,
2009-04-20 13:59:39 +02:00
), __LINE__ , __FILE__ ) -> fetchColumn ()))
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
$charsets [ $lang ] = 'utf-8' ;
2005-11-05 09:51:06 +01:00
}
}
2009-04-20 13:59:39 +02:00
return $charsets [ $lang ];
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
if ( self :: $system_charset ) // do we have a system-charset ==> return it
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
$charset = self :: $system_charset ;
2008-03-15 16:30:15 +01:00
}
else
{
// if no translations are loaded (system-startup) use a default, else lang('charset')
2009-04-20 13:59:39 +02:00
$charset = ! self :: $lang_arr ? 'utf-8' : strtolower ( self :: translate ( 'charset' ));
2005-11-05 09:51:06 +01:00
}
2011-11-10 11:24:37 +01:00
// in case no charset is set, default to utf-8
if ( empty ( $charset ) || $charset == 'charset' ) $charset = 'utf-8' ;
2008-03-15 16:30:15 +01:00
// we need to set our charset as mbstring.internal_encoding if mbstring.func_overlaod > 0
// else we get problems for a charset is different from the default utf-8
2014-12-08 20:16:44 +01:00
$ini_default_charset = version_compare ( PHP_VERSION , '5.6' , '<' ) ? 'mbstring.internal_encoding' : 'default_charset' ;
if ( ini_get ( $ini_default_charset ) && self :: $default_charset != $charset )
2008-03-15 16:30:15 +01:00
{
2014-12-08 20:16:44 +01:00
ini_set ( $ini_default_charset , self :: $default_charset = $charset );
2009-04-20 13:59:39 +02:00
}
2008-03-15 16:30:15 +01:00
return $charset ;
}
2005-11-05 09:51:06 +01:00
2008-03-15 16:30:15 +01:00
/**
* Initialises global lang - array and loads the 'common' and app - spec . translations
2009-04-20 13:59:39 +02:00
*
2015-04-26 13:00:33 +02:00
* @ param boolean $load_translations = true should we also load translations for common and currentapp
2008-03-15 16:30:15 +01:00
*/
2009-04-20 13:59:39 +02:00
static function init ( $load_translations = true )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! isset ( self :: $db ))
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
self :: $db = isset ( $GLOBALS [ 'egw_setup' ]) && isset ( $GLOBALS [ 'egw_setup' ] -> db ) ? $GLOBALS [ 'egw_setup' ] -> db : $GLOBALS [ 'egw' ] -> db ;
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
if ( ! isset ( $GLOBALS [ 'egw_setup' ]))
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
self :: $system_charset = $GLOBALS [ 'egw_info' ][ 'server' ][ 'system_charset' ];
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
else
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
self :: $system_charset =& $GLOBALS [ 'egw_setup' ] -> system_charset ;
}
if (( self :: $mbstring = check_load_extension ( 'mbstring' )))
{
if ( ! empty ( self :: $system_charset ))
{
2014-12-09 17:46:37 +01:00
$ini_default_charset = version_compare ( PHP_VERSION , '5.6' , '<' ) ? 'mbstring.internal_encoding' : 'default_charset' ;
ini_set ( $ini_default_charset , self :: $system_charset );
2009-04-20 13:59:39 +02:00
}
}
2012-11-16 10:21:17 +01:00
// try loading load_via from tree-wide cache and check if it contains more rules
if (( $load_via = egw_cache :: getTree ( __CLASS__ , 'load_via' )) &&
2015-04-26 13:00:33 +02:00
$load_via >= self :: $load_via && // > for array --> contains more elements
// little sanity check: cached array contains all stock keys, otherwise ignore it
! array_diff_key ( self :: $load_via , $load_via ))
2012-11-16 10:21:17 +01:00
{
self :: $load_via = $load_via ;
//error_log(__METHOD__."() load_via set from tree-wide cache to ".array2string(self::$load_via));
}
2014-06-12 12:07:57 +02:00
self :: $lang_arr = self :: $loaded_apps = array ();
2009-04-20 13:59:39 +02:00
if ( $load_translations )
{
if ( $GLOBALS [ 'egw_info' ][ 'user' ][ 'preferences' ][ 'common' ][ 'lang' ])
{
self :: $userlang = $GLOBALS [ 'egw_info' ][ 'user' ][ 'preferences' ][ 'common' ][ 'lang' ];
}
2012-11-05 14:24:52 +01:00
$apps = array ( 'common' );
// for eTemplate apps, load etemplate before app itself (allowing app to overwrite etemplate translations)
2015-04-26 13:00:33 +02:00
if ( class_exists ( 'etemplate_new' , false ) || class_exists ( 'etemplate' , false )) $apps [] = 'etemplate' ;
2012-11-05 14:24:52 +01:00
if ( $GLOBALS [ 'egw_info' ][ 'flags' ][ 'currentapp' ]) $apps [] = $GLOBALS [ 'egw_info' ][ 'flags' ][ 'currentapp' ];
// load instance specific translations last, so they can overwrite everything
$apps [] = 'custom' ;
self :: add_app ( $apps );
2009-04-20 13:59:39 +02:00
if ( ! count ( self :: $lang_arr ))
{
self :: $userlang = 'en' ;
2012-11-05 14:24:52 +01:00
self :: add_app ( $apps );
2009-04-20 13:59:39 +02:00
}
2008-03-15 16:30:15 +01:00
}
}
/**
* translates a phrase and evtl . substitute some variables
*
* @ param string $key phrase to translate , may contain placeholders % N ( N = 1 , 2 , ... ) for vars
2015-04-26 13:00:33 +02:00
* @ param array $vars = null vars to replace the placeholders , or null for none
* @ param string $not_found = '*' what to add to not found phrases , default '*'
2008-03-15 16:30:15 +01:00
* @ return string with translation
*/
2013-07-25 14:18:08 +02:00
static function translate ( $key , $vars = null , $not_found = '' )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! self :: $lang_arr )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
self :: init ();
2008-03-15 16:30:15 +01:00
}
$ret = $key ; // save key if we dont find a translation
2013-07-25 14:18:08 +02:00
if ( $not_found ) $ret .= $not_found ;
2005-11-05 09:51:06 +01:00
2009-04-20 13:59:39 +02:00
if ( isset ( self :: $lang_arr [ $key ]))
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
$ret = self :: $lang_arr [ $key ];
2008-03-15 16:30:15 +01:00
}
else
{
2012-11-05 13:18:22 +01:00
$new_key = strtolower ( $key );
2008-03-15 16:30:15 +01:00
2009-04-20 13:59:39 +02:00
if ( isset ( self :: $lang_arr [ $new_key ]))
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
$ret = self :: $lang_arr [ $new_key ];
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
}
if ( is_array ( $vars ) && count ( $vars ))
{
if ( count ( $vars ) > 1 )
2005-11-05 09:51:06 +01:00
{
2009-11-27 14:16:20 +01:00
static $placeholders = array ( '%3' , '%2' , '%1' , '|%2|' , '|%3|' , '%4' , '%5' , '%6' , '%7' , '%8' , '%9' , '%10' );
2009-10-19 09:41:03 +02:00
// to cope with $vars[0] containing '%2' (eg. an urlencoded path like a referer),
// we first replace '%2' in $ret with '|%2|' and then use that as 2. placeholder
2009-11-27 14:16:20 +01:00
// we do that for %3 as well, ...
2009-11-28 12:40:10 +01:00
$vars = array_merge ( array ( '|%3|' , '|%2|' ), $vars ); // push '|%2|' (and such) as first replacement on $vars
2009-04-20 13:59:39 +02:00
$ret = str_replace ( $placeholders , $vars , $ret );
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
else
2005-11-05 09:51:06 +01:00
{
2008-03-15 16:30:15 +01:00
$ret = str_replace ( '%1' , $vars [ 0 ], $ret );
2005-11-05 09:51:06 +01:00
}
}
2008-03-15 16:30:15 +01:00
return $ret ;
}
/**
2012-11-05 14:24:52 +01:00
* Adds translations for ( multiple ) application ( s )
2009-04-20 13:59:39 +02:00
*
* By default the translations are read from the tree - wide cache
2008-03-15 16:30:15 +01:00
*
2012-11-05 13:18:22 +01:00
* @ param string | array $apps name ( s ) of application ( s ) to add ( or 'common' for the general translations )
* if multiple names given , they are requested in one request from cache and loaded in given order
2015-04-26 13:00:33 +02:00
* @ param string $lang = false 2 or 5 char lang - code or false for the users language
2008-03-15 16:30:15 +01:00
*/
2012-11-05 14:24:52 +01:00
static function add_app ( $apps , $lang = null )
2008-03-15 16:30:15 +01:00
{
2014-06-12 12:07:57 +02:00
//error_log(__METHOD__."(".array2string($apps).", $lang) count(self::\$lang_arr)=".count(self::$lang_arr));
2012-11-05 14:24:52 +01:00
//$start = microtime(true);
if ( ! $lang ) $lang = self :: $userlang ;
2012-11-05 13:18:22 +01:00
$tree_level = $instance_level = array ();
2015-04-26 13:00:33 +02:00
if ( ! is_array ( $apps )) $apps = ( array ) $apps ;
2012-11-05 14:24:52 +01:00
foreach ( $apps as $key => $app )
2012-11-05 13:18:22 +01:00
{
if ( ! isset ( self :: $loaded_apps [ $app ]) || self :: $loaded_apps [ $app ] != $lang && $app != 'common' )
{
if ( in_array ( $app , self :: $instance_specific_translations ))
{
2012-11-05 14:24:52 +01:00
$instance_level [] = $app . ':' . ( $app == 'custom' ? 'en' : $lang );
2012-11-05 13:18:22 +01:00
}
else
{
2012-11-05 14:24:52 +01:00
$tree_level [] = $app . ':' . $lang ;
2012-11-05 13:18:22 +01:00
}
}
2012-11-05 14:24:52 +01:00
else
{
unset ( $apps [ $key ]);
}
2012-11-05 13:18:22 +01:00
}
2012-11-05 14:24:52 +01:00
// load all translations from cache at once
if ( $tree_level ) $tree_level = egw_cache :: getTree ( __CLASS__ , $tree_level );
if ( $instance_level ) $instance_level = egw_cache :: getInstance ( __CLASS__ , $instance_level );
2012-11-05 13:18:22 +01:00
2012-11-05 14:24:52 +01:00
// merging loaded translations together
2015-04-26 13:00:33 +02:00
$updated_load_via = false ;
2012-11-05 14:24:52 +01:00
foreach (( array ) $apps as $app )
2005-11-05 09:51:06 +01:00
{
2012-11-05 14:24:52 +01:00
$l = $app == 'custom' ? 'en' : $lang ;
if ( isset ( $tree_level [ $app . ':' . $l ]))
{
$loaded =& $tree_level [ $app . ':' . $l ];
}
elseif ( isset ( $instance_level [ $app . ':' . $l ]))
2011-11-30 10:20:05 +01:00
{
2012-11-05 14:24:52 +01:00
$loaded =& $instance_level [ $app . ':' . $l ];
}
else
{
if (( $instance_specific = in_array ( $app , self :: $instance_specific_translations )))
2012-11-05 13:18:22 +01:00
{
2012-11-05 14:24:52 +01:00
$loaded =& self :: load_app ( $app , $l );
2012-11-05 13:18:22 +01:00
}
else
{
2015-04-26 13:00:33 +02:00
$loaded =& self :: load_app_files ( $app , $l , null , $updated_load_via );
2012-11-05 13:18:22 +01:00
}
//error_log(__METHOD__."('$app', '$lang') instance_specific=$instance_specific, load_app(_files)() returned ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded)));
2011-11-30 10:20:05 +01:00
if ( $loaded || $instance_specific )
{
2014-02-21 12:10:11 +01:00
egw_cache :: setCache ( $instance_specific ? egw_cache :: INSTANCE : egw_cache :: TREE ,
2012-11-05 14:24:52 +01:00
__CLASS__ , $app . ':' . $l , $loaded );
2014-02-21 12:10:11 +01:00
//error_log(__METHOD__."('$app', '$lang') caching now ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded)));
2011-11-30 10:20:05 +01:00
}
}
2013-10-07 19:00:03 +02:00
if ( $loaded )
{
self :: $lang_arr = array_merge ( self :: $lang_arr , $loaded );
self :: $loaded_apps [ $app ] = $l ; // dont set something not existing to $loaded_apps, no need to load client-side
}
2009-04-20 13:59:39 +02:00
}
2014-10-14 18:03:50 +02:00
// Re-merge custom over instance level, they have higher precidence
if ( $tree_level && ! $instance_level && self :: $instance_specific_translations )
{
$custom = egw_cache :: getInstance ( __CLASS__ , 'custom:en' );
if ( $custom )
{
self :: $lang_arr = array_merge ( self :: $lang_arr , $custom );
}
}
2015-04-26 13:00:33 +02:00
if ( $updated_load_via )
{
self :: update_load_via ();
}
2013-10-07 19:00:03 +02:00
//error_log(__METHOD__.'('.array2string($apps).", '$lang') took ".(1000*(microtime(true)-$start))." ms, loaded_apps=".array2string(self::$loaded_apps).", loaded ".count($loaded)." phrases -> total=".count(self::$lang_arr));//.": ".function_backtrace());
2009-04-20 13:59:39 +02:00
}
/**
* Loads translations for an application from the database or direct from the lang - file for setup
*
* Never use directly , use add_app (), which employes caching ( it has to be public , to act as callback for the cache ! ) .
*
* @ param string $app name of the application to add ( or 'common' for the general translations )
2015-04-26 13:00:33 +02:00
* @ param string $lang = false 2 or 5 char lang - code or false for the users language
2009-04-20 13:59:39 +02:00
* @ return array the loaded strings
*/
static function & load_app ( $app , $lang )
{
//$start = microtime(true);
2012-11-05 13:18:22 +01:00
if ( is_null ( self :: $db )) self :: init ( false );
$loaded = array ();
foreach ( self :: $db -> select ( self :: LANG_TABLE , 'message_id,content' , array (
'lang' => $lang ,
'app_name' => $app ,
), __LINE__ , __FILE__ ) as $row )
2009-04-20 13:59:39 +02:00
{
2012-11-05 13:18:22 +01:00
$loaded [ strtolower ( $row [ 'message_id' ])] = $row [ 'content' ];
2009-04-20 13:59:39 +02:00
}
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."($app,$lang) took ".(1000*(microtime(true)-$start))." ms to load ".count($loaded)." phrases");
return $loaded ;
}
/**
* How to load translations for a given app
*
* Translations for common , preferences or admin are in spread over all applications .
* API has translations for some pseudo - apps .
*
* @ var array app => app ( s ) or string 'all-apps'
*/
static $load_via = array (
'common' => 'all-apps' ,
'preferences' => 'all-apps' ,
'admin' => 'all-apps' ,
2015-04-26 13:00:33 +02:00
'jscalendar' => array ( 'phpgwapi' ),
'sitemgr-link' => array ( 'sitemgr' ),
'groupdav' => array ( 'phpgwapi' ),
'login' => array ( 'phpgwapi' , 'registration' ),
2012-11-05 13:18:22 +01:00
);
/**
* Check if cached translations are up to date or invalidate cache if not
*
* Called via login . php for each interactive login .
*/
static function check_invalidate_cache ()
{
$lang = $GLOBALS [ 'egw_info' ][ 'user' ][ 'preferences' ][ 'common' ][ 'lang' ];
2015-04-26 13:00:33 +02:00
$apps = array_keys ( $GLOBALS [ 'egw_info' ][ 'apps' ]);
2012-11-05 13:18:22 +01:00
foreach ( $apps as $app )
2009-04-20 13:59:39 +02:00
{
2012-11-05 13:18:22 +01:00
$file = self :: get_lang_file ( $app , $lang );
// check if file has changed compared to what's cached
2014-02-21 12:10:11 +01:00
if ( file_exists ( $file ))
2005-11-05 09:51:06 +01:00
{
2014-02-21 12:10:11 +01:00
$cached_time = egw_cache :: getTree ( __CLASS__ , $file );
$file_time = filemtime ( $file );
if ( $cached_time != $file_time )
{
//error_log(__METHOD__."() $file MODIFIED ($cached_time != $file_time)");
self :: invalidate_lang_file ( $app , $lang );
}
//else error_log(__METHOD__."() $file unchanged ($cached_time == $file_time)");
2005-11-05 09:51:06 +01:00
}
}
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* Invalidate cache for lang - file of $app and $lang
2008-03-15 16:30:15 +01:00
*
2012-11-05 13:18:22 +01:00
* @ param string $app
* @ param string $lang
2008-03-15 16:30:15 +01:00
*/
2012-11-05 13:18:22 +01:00
static function invalidate_lang_file ( $app , $lang )
2008-03-15 16:30:15 +01:00
{
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."('$app', '$lang') invalidate translations $app:$lang");
egw_cache :: unsetTree ( __CLASS__ , $app . ':' . $lang );
2014-02-21 12:10:11 +01:00
egw_cache :: unsetTree ( __CLASS__ , self :: get_lang_file ( $app , $lang ));
2012-11-05 13:18:22 +01:00
foreach ( self :: $load_via as $load => $via )
2005-11-05 09:51:06 +01:00
{
2012-11-16 10:21:17 +01:00
//error_log("load_via[load='$load'] = via = ".array2string($via));
if ( $via === 'all-apps' || in_array ( $app , ( array ) $via ))
2012-11-05 13:18:22 +01:00
{
//error_log(__METHOD__."('$app', '$lang') additional invalidate translations $load:$lang");
egw_cache :: unsetTree ( __CLASS__ , $load . ':' . $lang );
2014-02-21 12:10:11 +01:00
egw_cache :: unsetTree ( __CLASS__ , self :: get_lang_file ( $load , $lang ));
2012-11-05 13:18:22 +01:00
}
}
2014-06-28 13:47:32 +02:00
// unset statistics
egw_cache :: unsetTree ( __CLASS__ , 'statistics' );
}
const STATISTIC_CACHE_TIMEOUT = 86400 ;
/**
* Statistical values about how much a language and app is translated , number or valid phrases per $lang or $lang / $app
*
2015-04-26 13:00:33 +02:00
* @ param string $_lang = null
2014-06-28 13:47:32 +02:00
* @ return array $lang or $app => number pairs
*/
static function statistics ( $_lang = null )
{
$cache = egw_cache :: getTree ( __CLASS__ , 'statistics' );
if ( ! isset ( $cache [( string ) $_lang ]))
{
$cache [( string ) $_lang ] = array ();
if ( empty ( $_lang ))
{
$en_phrases = array_keys ( self :: load_app_files ( null , 'en' , 'all-apps' ));
$cache [ '' ][ 'en' ] = count ( $en_phrases );
foreach ( array_keys ( self :: get_available_langs ()) as $lang )
{
if ( $lang == 'en' ) continue ;
$lang_phrases = array_keys ( self :: load_app_files ( null , $lang , 'all-apps' ));
$valid_phrases = array_intersect ( $lang_phrases , $en_phrases );
$cache [ '' ][ $lang ] = count ( $valid_phrases );
}
}
else
{
$cache [ 'en' ] = array ();
foreach ( scandir ( EGW_SERVER_ROOT ) as $app )
{
if ( $app [ 0 ] == '.' || ! is_dir ( EGW_SERVER_ROOT . '/' . $app ) ||
! file_exists ( self :: get_lang_file ( $app , 'en' )))
{
continue ;
}
$en_phrases = array_keys ( self :: load_app_files ( null , 'en' , $app ));
if ( count ( $en_phrases ) <= 2 ) continue ;
$cache [ 'en' ][ $app ] = count ( $en_phrases );
$lang_phrases = array_keys ( self :: load_app_files ( null , $_lang , $app ));
$valid_phrases = array_intersect ( $lang_phrases , $en_phrases );
$cache [ $_lang ][ $app ] = count ( $valid_phrases );
}
asort ( $cache [ 'en' ], SORT_NUMERIC );
$cache [ 'en' ] = array_reverse ( $cache [ 'en' ], true );
}
asort ( $cache [( string ) $_lang ], SORT_NUMERIC );
$cache [( string ) $_lang ] = array_reverse ( $cache [( string ) $_lang ], true );
egw_cache :: setTree ( __CLASS__ , 'statistics' , $cache , self :: STATISTIC_CACHE_TIMEOUT );
}
return $cache [( string ) $_lang ];
2012-11-05 13:18:22 +01:00
}
2014-01-09 16:32:07 +01:00
/**
* Get a state / etag for a given app ' s translations
*
2014-02-21 12:10:11 +01:00
* We currently only use a single state for all none - instance - specific apps depending on self :: max_lang_time () .
2014-01-09 16:32:07 +01:00
*
* @ param string $_app
* @ param string $_lang
* @ return string
*/
static function etag ( $_app , $_lang )
{
if ( ! in_array ( $_app , translation :: $instance_specific_translations ))
{
2014-02-21 12:10:11 +01:00
// check if cache is NOT invalided by checking if we have a modification time for concerned lang-file
$time = egw_cache :: getTree ( __CLASS__ , $file = self :: get_lang_file ( $_app , $_lang ));
// if we dont have one, cache has been invalidated and we need to load translations
if ( ! isset ( $time )) self :: add_app ( $_app , $_lang );
$etag = self :: max_lang_time ();
2014-01-09 16:32:07 +01:00
}
else
{
$etag = md5 ( json_encode ( egw_cache :: getCache ( egw_cache :: INSTANCE , __CLASS__ , $_app . ':' . $_lang )));
}
2014-01-10 10:01:41 +01:00
//error_log(__METHOD__."('$_app', '$_lang') returning '$etag'");
2014-01-09 16:32:07 +01:00
return $etag ;
}
2014-02-21 12:10:11 +01:00
/**
* Get or set maximum / latest modification - time for files of not instance - specific translations
*
* @ param type $time
* @ return type
*/
static function max_lang_time ( $time = null )
{
static $max_lang_time = null ;
if ( ! isset ( $max_lang_time ) || isset ( $time ))
{
$max_lang_time = egw_cache :: getTree ( __CLASS__ , 'max_lang_time' );
}
if ( isset ( $time ) && $time > $max_lang_time )
{
//error_log(__METHOD__."($time) updating previous max_lang_time=$max_lang_time to $time");
egw_cache :: setTree ( __CLASS__ , 'max_lang_time' , $max_lang_time = $time );
}
return $max_lang_time ;
}
2012-11-05 13:18:22 +01:00
/**
* Loads translations for an application direct from the lang - file ( s )
*
* Never use directly , use add_app (), which employes caching ( it has to be public , to act as callback for the cache ! ) .
*
* @ param string $app name of the application to add ( or 'common' for the general translations )
2015-04-26 13:00:33 +02:00
* @ param string $lang = false 2 or 5 char lang - code or false for the users language
* @ param string $just_app_file = null if given only that app is loaded ignoring self :: $load_via
* @ param boolean $updated_load_via = false on return true if self :: $load_via was updated
2012-11-05 13:18:22 +01:00
* @ return array the loaded strings
*/
2015-04-26 13:00:33 +02:00
static function & load_app_files ( $app , $lang , $just_app_file = null , & $updated_load_via = false )
2012-11-05 13:18:22 +01:00
{
2014-02-21 12:10:11 +01:00
//$start = microtime(true);
2014-06-28 13:47:32 +02:00
$load_app = isset ( $just_app_file ) ? $just_app_file : ( isset ( self :: $load_via [ $app ]) ? self :: $load_via [ $app ] : $app );
2012-11-05 13:18:22 +01:00
$loaded = array ();
foreach ( $load_app == 'all-apps' ? scandir ( EGW_SERVER_ROOT ) : ( array ) $load_app as $app_dir )
{
2013-02-26 12:08:20 +01:00
if ( $load_app == 'all-apps' && $app_dir == '..' ) continue ; // do not try to break out of egw server root
2013-02-21 07:32:30 +01:00
if ( $app_dir [ 0 ] == '.' || ! is_dir ( EGW_SERVER_ROOT . '/' . $app_dir ) ||
2012-11-05 13:18:22 +01:00
!@ file_exists ( $file = self :: get_lang_file ( $app_dir , $lang )) ||
! ( $f = fopen ( $file , 'r' )))
{
continue ;
}
// store ctime of file we parse
2014-02-21 12:10:11 +01:00
egw_cache :: setTree ( __CLASS__ , $file , $time = filemtime ( $file ));
self :: max_lang_time ( $time );
2012-11-05 13:18:22 +01:00
$line_nr = 0 ;
2012-12-12 10:42:56 +01:00
//use fgets and split the line, as php5.3.3 with squeeze does not support splitting lines with fgetcsv while reading properly
//if the first letter after the delimiter is a german umlaut (UTF8 representation thereoff)
//while(($line = fgetcsv($f, 1024, "\t")))
2014-02-21 12:10:11 +01:00
while (( $read = fgets ( $f )))
2005-11-05 09:51:06 +01:00
{
2014-02-21 12:10:11 +01:00
$line = explode ( " \t " , trim ( $read ));
2012-11-05 13:18:22 +01:00
++ $line_nr ;
if ( count ( $line ) != 4 ) continue ;
list ( $l_id , $l_app , $l_lang , $l_translation ) = $line ;
if ( $l_lang != $lang ) continue ;
2014-06-28 13:47:32 +02:00
if ( ! isset ( $just_app_file ) && $l_app != $app )
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
// check if $l_app contained in file in $app_dir is mentioned in $load_via
if ( $l_app != $app_dir && ( ! isset ( self :: $load_via [ $l_app ]) ||
! array_intersect (( array ) self :: $load_via [ $l_app ], array ( 'all-apps' , $app_dir ))))
{
2015-04-26 13:00:33 +02:00
if ( ! isset ( self :: $load_via [ $l_app ]) && ! file_exists ( EGW_SERVER_ROOT . '/' . $l_app ))
2012-11-05 13:18:22 +01:00
{
error_log ( __METHOD__ . " () lang file $file contains invalid app ' $l_app ' on line $line_nr --> ignored " );
continue ;
}
// if not update load_via accordingly and store it as config
//error_log(__METHOD__."() load_via does not contain $l_app => $app_dir");
if ( ! isset ( self :: $load_via [ $l_app ])) self :: $load_via [ $l_app ] = array ( $l_app );
if ( ! is_array ( self :: $load_via [ $l_app ])) self :: $load_via [ $l_app ] = array ( self :: $load_via [ $l_app ]);
self :: $load_via [ $l_app ][] = $app_dir ;
2015-04-26 13:00:33 +02:00
$updated_load_via = true ;
2012-11-05 13:18:22 +01:00
}
continue ;
2005-11-05 09:51:06 +01:00
}
2012-11-05 13:18:22 +01:00
$loaded [ $l_id ] = $l_translation ;
2005-11-05 09:51:06 +01:00
}
2012-11-05 13:18:22 +01:00
fclose ( $f );
2005-11-05 09:51:06 +01:00
}
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."('$app', '$lang') returning ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded))." in ".number_format(microtime(true)-$start,3)." secs".' '.function_backtrace());
return $loaded ;
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2015-04-26 13:00:33 +02:00
/**
* Update tree - wide stored load_via with our changes
*
* Merging in meantime stored changes from other instances to minimize race - conditions
*/
protected static function update_load_via ()
{
if (( $load_via = egw_cache :: getTree ( __CLASS__ , 'load_via' )) &&
// little sanity check: cached array contains all stock keys, otherwise ignore it
! array_diff_key ( self :: $load_via , $load_via ))
{
foreach ( $load_via as $app => $via )
{
if ( self :: $load_via [ $app ] != $via )
{
//error_log(__METHOD__."() setting load_via[$app]=".array2string($via));
self :: $load_via [ $app ] = array_unique ( array_merge (( array ) self :: $load_via [ $app ], ( array ) $via ));
}
}
}
egw_cache :: setTree ( __CLASS__ , 'load_via' , self :: $load_via );
}
2008-03-15 16:30:15 +01:00
/**
* Cached languages
*
* @ var array
*/
2009-04-20 13:59:39 +02:00
static $langs ;
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* Returns a list of available languages / translations
2008-03-15 16:30:15 +01:00
*
2015-04-26 13:00:33 +02:00
* @ param boolean $translate = true translate language - names
* @ param boolean $force_read = false force a re - read of the languages
2008-03-15 16:30:15 +01:00
* @ return array with lang - code => descriptiv lang - name pairs
*/
2012-11-05 13:18:22 +01:00
static function get_available_langs ( $translate = true , $force_read = false )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! is_array ( self :: $langs ) || $force_read )
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
if ( ! ( $f = fopen ( $file = EGW_SERVER_ROOT . '/setup/lang/languages' , 'rb' )))
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
throw new egw_exception ( " List of available languages (%1) missing! " , $file );
2005-11-05 09:51:06 +01:00
}
2012-11-05 13:18:22 +01:00
while (( $line = fgetcsv ( $f , null , " \t " )))
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
self :: $langs [ $line [ 0 ]] = $line [ 1 ];
}
fclose ( $f );
if ( $translate )
{
if ( is_null ( self :: $db )) self :: init ( false );
foreach ( self :: $langs as $lang => $name )
{
self :: $langs [ $lang ] = self :: translate ( $name , False , '' );
}
2005-11-05 09:51:06 +01:00
}
2009-04-20 13:59:39 +02:00
uasort ( self :: $langs , 'strcasecmp' );
2005-11-05 09:51:06 +01:00
}
2009-04-20 13:59:39 +02:00
return self :: $langs ;
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2009-04-23 11:47:22 +02:00
/**
2012-11-05 13:18:22 +01:00
* Returns a list of installed languages / translations
2009-04-23 11:47:22 +02:00
*
2012-11-05 13:18:22 +01:00
* Translations no longer need to be installed , therefore all available translations are returned here .
2009-04-23 11:47:22 +02:00
*
2015-04-26 13:00:33 +02:00
* @ param boolean $force_read = false force a re - read of the languages
2012-11-05 13:18:22 +01:00
* @ return array with lang - code => descriptiv lang - name pairs
2009-04-23 11:47:22 +02:00
*/
2012-11-05 13:18:22 +01:00
static function get_installed_langs ( $force_read = false )
2009-04-23 11:47:22 +02:00
{
2015-04-26 13:00:33 +02:00
return self :: get_available_langs ( $force_read );
2009-04-23 11:47:22 +02:00
}
2008-03-15 16:30:15 +01:00
/**
* translates a 2 or 5 char lang - code into a ( verbose ) language
*
* @ param string $lang
* @ return string / false language or false if not found
*/
2009-04-20 13:59:39 +02:00
static function lang2language ( $lang )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( isset ( self :: $langs [ $lang ])) // no need to query the DB
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
return self :: $langs [ $lang ];
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
return self :: $db -> select ( self :: LANGUAGES_TABLE , 'lang_name' , array ( 'lang_id' => $lang ), __LINE__ , __FILE__ ) -> fetchColumn ();
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* List all languages , first available ones , then the rest
2008-03-15 16:30:15 +01:00
*
2015-04-26 13:00:33 +02:00
* @ param boolean $force_read = false
2008-03-15 16:30:15 +01:00
* @ return array with lang_id => lang_name pairs
*/
2009-04-23 11:47:22 +02:00
static function list_langs ( $force_read = false )
2008-03-15 16:30:15 +01:00
{
2009-04-23 11:47:22 +02:00
if ( ! $force_read )
{
2009-05-02 18:45:38 +02:00
return egw_cache :: getInstance ( __CLASS__ , 'list_langs' , array ( __CLASS__ , 'list_langs' ), array ( true ));
2009-04-23 11:47:22 +02:00
}
2012-11-05 13:18:22 +01:00
$languages = self :: get_installed_langs (); // available languages
$availible = " (' " . implode ( " ',' " , array_keys ( $languages )) . " ') " ;
2009-04-20 13:59:39 +02:00
2008-03-15 16:30:15 +01:00
// this shows first the installed, then the available and then the rest
2009-04-20 13:59:39 +02:00
foreach ( self :: $db -> select ( self :: LANGUAGES_TABLE , array (
2008-03-15 16:30:15 +01:00
'lang_id' , 'lang_name' ,
" CASE WHEN lang_id IN $availible THEN 1 ELSE 0 END AS availible " ,
), " lang_id NOT IN (' " . implode ( " ',' " , array_keys ( $languages )) . " ') " , __LINE__ , __FILE__ , false , ' ORDER BY availible DESC,lang_name' ) as $row )
{
$languages [ $row [ 'lang_id' ]] = $row [ 'lang_name' ];
}
return $languages ;
}
2005-11-05 09:51:06 +01:00
2012-11-05 13:18:22 +01:00
/**
2009-11-28 12:34:40 +01:00
* provides centralization and compatibility to locate the lang files
2011-08-04 09:27:23 +02:00
*
* @ param string $app application name
* @ param string $lang language code
* @ return the full path of the filename for the requested app and language
*/
static function get_lang_file ( $app , $lang )
2009-11-28 12:34:40 +01:00
{
2014-02-21 12:10:11 +01:00
if ( $app == 'common' ) $app = 'phpgwapi' ;
2012-11-05 13:18:22 +01:00
return EGW_SERVER_ROOT . '/' . $app . '/' . self :: LANG_DIR . '/' . self :: LANGFILE_PREFIX . $lang . self :: LANGFILE_EXTENSION ;
2009-11-28 12:34:40 +01:00
}
2008-03-15 16:30:15 +01:00
/**
* returns a list of installed charsets
*
* @ return array with charset as key and comma - separated list of langs useing the charset as data
*/
2009-04-20 13:59:39 +02:00
static function get_installed_charsets ()
2008-03-15 16:30:15 +01:00
{
2014-02-21 12:10:11 +01:00
static $charsets = null ;
2008-03-15 16:30:15 +01:00
2009-04-20 13:59:39 +02:00
if ( ! isset ( $charsets ))
{
$charsets = array (
2011-12-14 22:34:24 +01:00
'utf-8' => lang ( 'Unicode' ) . ' (utf-8)' ,
2009-05-11 10:10:05 +02:00
'iso-8859-1' => lang ( 'Western european' ) . ' (iso-8859-1)' ,
'iso-8859-2' => lang ( 'Eastern european' ) . ' (iso-8859-2)' ,
'iso-8859-7' => lang ( 'Greek' ) . ' (iso-8859-7)' ,
'euc-jp' => lang ( 'Japanese' ) . ' (euc-jp)' ,
'euc-kr' => lang ( 'Korean' ) . ' (euc-kr)' ,
'koi8-r' => lang ( 'Russian' ) . ' (koi8-r)' ,
'windows-1251' => lang ( 'Bulgarian' ) . ' (windows-1251)' ,
2011-12-14 22:34:24 +01:00
'cp850' => lang ( 'DOS International' ) . ' (CP850)' ,
2009-04-20 13:59:39 +02:00
);
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
return $charsets ;
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2014-02-25 16:48:23 +01:00
/**
* Transliterate utf - 8 filename to ascii , eg . 'Äpfel' --> 'Aepfel'
*
2015-04-26 13:00:33 +02:00
* @ param string $_str
2014-02-25 16:48:23 +01:00
* @ return string
*/
2015-04-26 13:00:33 +02:00
static function to_ascii ( $_str )
2014-02-25 16:48:23 +01:00
{
static $extra = array (
'ß' => 'ss' ,
);
2015-04-26 13:00:33 +02:00
$entities = htmlentities ( $_str , ENT_QUOTES , self :: charset ());
2014-02-25 16:48:23 +01:00
2015-04-26 13:00:33 +02:00
$estr = str_replace ( array_keys ( $extra ), array_values ( $extra ), $entities );
$ustr = preg_replace ( '/&([aAuUoO])uml;/' , '\\1e' , $estr ); // replace german umlauts with the letter plus one 'e'
$astr = preg_replace ( '/&([a-zA-Z])(grave|acute|circ|ring|cedil|tilde|slash|uml);/' , '\\1' , $ustr ); // remove all types of accents
return preg_replace ( '/&([a-zA-Z]+|#[0-9]+|);/' , '' , $astr ); // remove all other entities
2014-02-25 16:48:23 +01:00
}
2008-03-15 16:30:15 +01:00
/**
* converts a string $data from charset $from to charset $to
*
* @ param string / array $data string ( s ) to convert
* @ param string / boolean $from charset $data is in or False if it should be detected
* @ param string / boolean $to charset to convert to or False for the system - charset the converted string
2015-04-26 13:00:33 +02:00
* @ param boolean $check_to_from = true internal to bypass all charset replacements
2008-03-15 16:30:15 +01:00
* @ return string / array converted string ( s ) from $data
*/
2009-04-20 13:59:39 +02:00
static function convert ( $data , $from = False , $to = False , $check_to_from = true )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( $check_to_from )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( $from ) $from = strtolower ( $from );
if ( $to ) $to = strtolower ( $to );
if ( ! $from )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
$from = self :: $mbstring ? strtolower ( mb_detect_encoding ( $data )) : 'iso-8859-1' ;
if ( $from == 'ascii' )
{
$from = 'iso-8859-1' ;
}
//echo "<p>autodetected charset of '$data' = '$from'</p>\n";
}
/*
php does not seem to support gb2312
but seems to be able to decode it as EUC - CN
*/
switch ( $from )
{
2015-10-22 13:26:26 +02:00
case 'ks_c_5601-1987' :
$from = 'CP949' ;
break ;
2009-04-20 13:59:39 +02:00
case 'gb2312' :
case 'gb18030' :
$from = 'EUC-CN' ;
break ;
2012-06-04 13:58:08 +02:00
case 'windows-1252' :
2012-08-01 16:53:33 +02:00
case 'mswin1252' :
2012-06-04 13:58:08 +02:00
if ( function_exists ( 'iconv' ))
{
$prefer_iconv = true ;
break ;
}
// fall throught to remap to iso-8859-1
2009-04-20 13:59:39 +02:00
case 'us-ascii' :
case 'macroman' :
case 'iso8859-1' :
case 'windows-1258' :
$from = 'iso-8859-1' ;
break ;
case 'windows-1250' :
$from = 'iso-8859-2' ;
break ;
2009-05-26 15:42:05 +02:00
case 'windows-1257' :
$from = 'iso-8859-13' ;
break ;
2010-05-11 17:38:39 +02:00
case 'windows-874' :
case 'tis-620' :
2012-06-04 13:58:08 +02:00
case 'windows-1256' :
2010-05-11 17:38:39 +02:00
$prefer_iconv = true ;
break ;
2009-04-20 13:59:39 +02:00
}
if ( ! $to )
{
$to = self :: charset ();
}
if ( $from == $to || ! $from || ! $to || ! $data )
{
return $data ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
if ( is_array ( $data ))
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
foreach ( $data as $key => $str )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
$ret [ $key ] = self :: convert ( $str , $from , $to , false ); // false = bypass the above checks, as they are already done
2005-11-05 09:51:06 +01:00
}
2009-04-20 13:59:39 +02:00
return $ret ;
2008-03-15 16:30:15 +01:00
}
if ( $from == 'iso-8859-1' && $to == 'utf-8' )
{
return utf8_encode ( $data );
}
if ( $to == 'iso-8859-1' && $from == 'utf-8' )
{
return utf8_decode ( $data );
}
2010-05-11 17:38:39 +02:00
if ( self :: $mbstring && ! $prefer_iconv && ( $data = @ mb_convert_encoding ( $data , $to , $from )) != '' )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
return $data ;
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
if ( function_exists ( 'iconv' ))
2008-03-15 16:30:15 +01:00
{
// iconv can not convert from/to utf7-imap
2009-04-20 13:59:39 +02:00
if ( $to == 'utf7-imap' && function_exists ( imap_utf7_encode ))
2005-11-05 09:51:06 +01:00
{
2014-02-21 12:10:11 +01:00
$data_iso = iconv ( $from , 'iso-8859-1' , $data );
$convertedData = imap_utf7_encode ( $data_iso );
2009-04-20 13:59:39 +02:00
2008-03-15 16:30:15 +01:00
return $convertedData ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
2009-04-20 13:59:39 +02:00
if ( $from == 'utf7-imap' && function_exists ( imap_utf7_decode ))
2005-11-05 09:51:06 +01:00
{
2014-02-21 12:10:11 +01:00
$data_iso = imap_utf7_decode ( $data );
$convertedData = iconv ( 'iso-8859-1' , $to , $data_iso );
2008-03-15 16:30:15 +01:00
return $convertedData ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
// the following is to workaround patch #962307
2009-04-20 13:59:39 +02:00
// if using EUC-CN, for iconv it strickly follow GB2312 and fail
// in an email on the first Traditional/Japanese/Korean character,
// but in reality when people send mails in GB2312, UMA mostly use
2008-03-15 16:30:15 +01:00
// extended GB13000/GB18030 which allow T/Jap/Korean characters.
2010-05-11 17:38:39 +02:00
if ( $from == 'euc-cn' )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
$from = 'gb18030' ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
2009-04-20 13:59:39 +02:00
if (( $convertedData = iconv ( $from , $to , $data )))
2005-11-05 09:51:06 +01:00
{
2008-03-15 16:30:15 +01:00
return $convertedData ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
}
return $data ;
}
2007-02-14 12:44:01 +01:00
2015-09-24 12:53:02 +02:00
/**
* converts a string $data from charset $from to something that is json_encode tested
*
* @ param string / array $data string ( s ) to convert
* @ param string / boolean $from charset $data is in or False if it should be detected
* @ return string / array converted string ( s ) from $data
*/
static function convert_jsonsafe ( $data , $from = False )
{
if ( $from === false ) $from = self :: detect_encoding ( $data );
$data = self :: convert ( $data , strtolower ( $from ));
// in a way, this tests if we are having real utf-8 (the displayCharset) by now; we should if charsets reported (or detected) are correct
if ( strtoupper ( self :: charset ()) == 'UTF-8' )
{
$test = @ json_encode ( $data );
//error_log(__METHOD__.__LINE__.' ->'.strlen($data).' Error:'.json_last_error().'<- data:#'.$test.'#');
if (( $test == " null " || $test === false || is_null ( $test )) && strlen ( $data ) > 0 )
{
// try to fix broken utf8
$x = ( function_exists ( 'mb_convert_encoding' ) ? mb_convert_encoding ( $data , 'UTF-8' , 'UTF-8' ) : ( function_exists ( 'iconv' ) ? @ iconv ( " UTF-8 " , " UTF-8//IGNORE " , $data ) : $data ));
$test = @ json_encode ( $x );
if (( $test == " null " || $test === false || is_null ( $test )) && strlen ( $data ) > 0 )
{
// this should not be needed, unless something fails with charset detection/ wrong charset passed
error_log ( __METHOD__ . __LINE__ . ' Charset Reported:' . $from . ' Charset Detected:' . translation :: detect_encoding ( $data ));
$data = utf8_encode ( $data );
}
else
{
$data = $x ;
}
}
}
return $data ;
}
2008-03-15 16:30:15 +01:00
/**
2011-11-17 13:43:59 +01:00
* insert / update / delete one phrase in the lang - table
2008-03-15 16:30:15 +01:00
*
* @ param string $lang
2009-04-28 17:56:04 +02:00
* @ param string $app
2008-03-15 16:30:15 +01:00
* @ param string $message_id
2011-11-17 13:43:59 +01:00
* @ param string $content translation or null to delete translation
2008-03-15 16:30:15 +01:00
*/
2009-04-28 17:56:04 +02:00
static function write ( $lang , $app , $message_id , $content )
2008-03-15 16:30:15 +01:00
{
2011-11-17 13:43:59 +01:00
if ( $content )
{
self :: $db -> insert ( self :: LANG_TABLE , array (
'content' => $content ,
), array (
'lang' => $lang ,
'app_name' => $app ,
'message_id' => $message_id ,
), __LINE__ , __FILE__ );
}
else
{
self :: $db -> delete ( self :: LANG_TABLE , array (
'lang' => $lang ,
'app_name' => $app ,
'message_id' => $message_id ,
), __LINE__ , __FILE__ );
}
2009-04-28 17:56:04 +02:00
// invalidate the cache
2010-10-05 10:30:50 +02:00
if ( ! in_array ( $app , self :: $instance_specific_translations ))
{
egw_cache :: unsetCache ( egw_cache :: TREE , __CLASS__ , $app . ':' . $lang );
}
else
{
2014-02-21 12:10:11 +01:00
foreach ( array_keys (( array ) self :: get_installed_langs ()) as $key )
2010-10-05 10:30:50 +02:00
{
egw_cache :: unsetCache ( egw_cache :: INSTANCE , __CLASS__ , $app . ':' . $key );
}
}
2012-11-05 13:18:22 +01:00
}
2008-03-15 16:30:15 +01:00
/**
* read one phrase from the lang - table
*
* @ param string $lang
* @ param string $app_name
* @ param string $message_id
* @ return string / boolean content or false if not found
*/
2009-04-20 13:59:39 +02:00
static function read ( $lang , $app_name , $message_id )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
return self :: $db -> select ( self :: LANG_TABLE , 'content' , array (
2008-03-15 16:30:15 +01:00
'lang' => $lang ,
'app_name' => $app_name ,
'message_id' => $message_id ,
2009-04-20 13:59:39 +02:00
), __LINE__ , __FILE__ ) -> fetchColumn ();
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
2008-03-15 16:30:15 +01:00
/**
* Return the message_id of a given translation
*
* @ param string $translation
2015-04-26 13:00:33 +02:00
* @ param string $app = '' default check all apps
* @ param string $lang = '' default check all langs
2008-03-15 16:30:15 +01:00
* @ return string
*/
2009-04-20 13:59:39 +02:00
static function get_message_id ( $translation , $app = null , $lang = null )
2008-03-15 16:30:15 +01:00
{
2010-06-22 18:54:47 +02:00
$where = array ( 'content ' . self :: $db -> capabilities [ egw_db :: CAPABILITY_CASE_INSENSITIV_LIKE ] . ' ' . self :: $db -> quote ( $translation ));
2008-03-15 16:30:15 +01:00
if ( $app ) $where [ 'app_name' ] = $app ;
if ( $lang ) $where [ 'lang' ] = $lang ;
2009-04-20 13:59:39 +02:00
return self :: $db -> select ( self :: LANG_TABLE , 'message_id' , $where , __LINE__ , __FILE__ ) -> fetchColumn ();
2001-08-31 10:45:44 +02:00
}
2009-04-01 17:20:32 +02:00
2012-11-05 13:18:22 +01:00
/**
2012-10-24 12:54:44 +02:00
* detect_encoding - try to detect the encoding
* only to be used if the string in question has no structure that determines his encoding
2013-03-01 10:51:02 +01:00
*
2012-10-24 12:54:44 +02:00
* @ param string - to be evaluated
2015-04-26 13:00:33 +02:00
* @ param string $verify = null encoding to verify , get checked first and have a match for only ascii or no detection available
2012-10-24 12:54:44 +02:00
* @ return string - encoding
*/
2013-03-01 10:51:02 +01:00
static function detect_encoding ( $string , $verify = null )
{
2012-10-24 12:54:44 +02:00
if ( function_exists ( 'iconv' ))
{
2013-03-01 10:51:02 +01:00
$list = array ( 'utf-8' , 'iso-8859-1' , 'windows-1251' ); // list may be extended
if ( $verify ) array_unshift ( $list , $verify );
foreach ( $list as $item )
{
2012-10-24 12:54:44 +02:00
$sample = iconv ( $item , $item , $string );
2013-03-01 10:51:02 +01:00
if ( $sample == $string )
{
2012-10-24 12:54:44 +02:00
return $item ;
2013-03-01 10:51:02 +01:00
}
2012-10-24 12:54:44 +02:00
}
}
2013-03-01 10:51:02 +01:00
if ( self :: $mbstring )
{
$detected = strtolower ( mb_detect_encoding ( $string ));
}
if ( $verify && ( ! isset ( $detected ) || $detected === 'ascii' ))
{
return $verify ; // ascii matches all charsets
}
return isset ( $detected ) ? $detected : 'iso-8859-1' ; // we choose to return iso-8859-1 as default
2012-10-24 12:54:44 +02:00
}
2009-04-01 17:20:32 +02:00
/**
* Return the decoded string meeting some additional requirements for mailheaders
*
* @ param string $_string -> part of an mailheader
* @ param string $displayCharset the charset parameter specifies the character set to represent the result by ( if iconv_mime_decode is to be used )
* @ return string
*/
2009-04-20 13:59:39 +02:00
static function decodeMailHeader ( $_string , $displayCharset = 'utf-8' )
2009-04-01 17:20:32 +02:00
{
2009-04-03 22:12:35 +02:00
//error_log(__FILE__.','.__METHOD__.':'."called with $_string and CHARSET $displayCharset");
2009-04-20 13:59:39 +02:00
if ( function_exists ( imap_mime_header_decode ))
{
2010-06-07 16:31:59 +02:00
// some characterreplacements, as they fail to translate
$sar = array (
'@(\x84|\x93|\x94)@' ,
2012-05-25 14:23:11 +02:00
'@(\x96|\x97|\x1a)@' ,
2010-06-07 16:31:59 +02:00
'@(\x91|\x92)@' ,
'@(\x85)@' ,
'@(\x86)@' ,
);
$rar = array (
'"' ,
'-' ,
'\'' ,
'...' ,
'+' ,
);
2009-04-01 17:20:32 +02:00
$newString = '' ;
$string = preg_replace ( '/\?=\s+=\?/' , '?= =?' , $_string );
$elements = imap_mime_header_decode ( $string );
2010-06-07 16:31:59 +02:00
2009-09-08 15:25:57 +02:00
$convertAtEnd = false ;
2009-04-20 13:59:39 +02:00
foreach (( array ) $elements as $element )
{
2012-10-24 12:54:44 +02:00
if ( $element -> charset == 'default' ) $element -> charset = self :: detect_encoding ( $element -> text );
2009-09-08 15:25:57 +02:00
if ( $element -> charset != 'x-unknown' )
{
2010-06-07 16:31:59 +02:00
if ( strtoupper ( $element -> charset ) != 'UTF-8' ) $element -> text = preg_replace ( $sar , $rar , $element -> text );
2012-11-07 12:46:14 +01:00
// check if there is a possible nested encoding; make sure that the inputstring and the decoded result are different to avoid loops
if ( preg_match ( '/\?=.+=\?/' , $element -> text ) && $element -> text != $_string )
2011-08-18 16:49:16 +02:00
{
$element -> text = self :: decodeMailHeader ( $element -> text , $element -> charset );
$element -> charset = $displayCharset ;
}
2009-09-08 15:25:57 +02:00
$newString .= self :: convert ( $element -> text , $element -> charset );
}
else
{
$newString .= $element -> text ;
$convertAtEnd = true ;
}
2009-04-01 17:20:32 +02:00
}
2009-10-19 09:41:03 +02:00
if ( $convertAtEnd ) $newString = self :: decodeMailHeader ( $newString , $displayCharset );
2009-04-01 17:20:32 +02:00
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $newString );
2009-04-20 13:59:39 +02:00
}
elseif ( function_exists ( mb_decode_mimeheader ))
{
2015-04-26 13:00:33 +02:00
$matches = null ;
if ( preg_match_all ( '/=\?.*\?Q\?.*\?=/iU' , $string = $_string , $matches ))
2009-04-20 13:59:39 +02:00
{
foreach ( $matches [ 0 ] as $match )
{
2009-04-01 17:20:32 +02:00
$fixedMatch = str_replace ( '_' , ' ' , $match );
$string = str_replace ( $match , $fixedMatch , $string );
}
2015-04-26 13:00:33 +02:00
$string = str_replace ( '=?ISO8859-' , '=?ISO-8859-' ,
str_replace ( '=?windows-1258' , '=?ISO-8859-1' , $string ));
2009-04-01 17:20:32 +02:00
}
$string = mb_decode_mimeheader ( $string );
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $string );
2009-04-20 13:59:39 +02:00
}
elseif ( function_exists ( iconv_mime_decode ))
{
2009-04-01 17:20:32 +02:00
// continue decoding also if an error occurs
$string = @ iconv_mime_decode ( $_string , 2 , $displayCharset );
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $string );
}
// no decoding function available
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $_string );
}
2009-09-04 15:40:10 +02:00
/**
2009-11-18 11:38:15 +01:00
* replace emailaddresses enclosed in <> ( eg .: < me @ you . de > ) with the emailaddress only ( e . g : me @ you . de )
* as well as those emailadresses in links , and within broken links
* @ param string the text to process
* @ return 1
*/
2009-09-04 15:40:10 +02:00
static function replaceEmailAdresses ( & $text )
{
2009-12-16 11:57:37 +01:00
//error_log($text);
2010-05-06 13:16:57 +02:00
//replace CRLF with something other to be preserved via preg_replace as CRLF seems to vanish
$text = str_replace ( " \r \n " , '<#cr-lf#>' , $text );
2009-09-04 15:40:10 +02:00
// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
2014-11-11 11:29:58 +01:00
$text = preg_replace ( " /(<|<a href= \" )*(mailto:([ \ w \ .,-.,_.,0-9.]+)(@)([ \ w \ .,-.,_.,0-9.]+))(>|>)*/i " , " $ 2 " , $text );
2013-08-29 12:39:08 +02:00
//$text = preg_replace_callback("/(<|<a href=\")*(mailto:([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|>)*/i",'self::transform_mailto2text',$text);
//$text = preg_replace('~<a[^>]+href=\"(mailto:)+([^"]+)\"[^>]*>~si','$2 ',$text);
$text = preg_replace_callback ( '~<a[^>]+href=\"(mailto:)+([^"]+)\"[^>]*>([ @\w\.,-.,_.,0-9.]+)<\/a>~si' , 'self::transform_mailto2text' , $text );
2014-11-11 11:29:58 +01:00
$text = preg_replace ( " /(([ \ w \ .,-.,_.,0-9.]+)(@)([ \ w \ .,-.,_.,0-9.]+))( | \ s)*(< \ /a>)*( | \ s)*(>|>)*/i " , " $ 1 " , $text );
$text = preg_replace ( " /(<|<)*(([ \ w \ .,-.,_.,0-9.]+)@([ \ w \ .,-.,_.,0-9.]+))(>|>)*/i " , " $ 2 " , $text );
2010-05-06 13:16:57 +02:00
$text = str_replace ( '<#cr-lf#>' , " \r \n " , $text );
2009-09-04 15:40:10 +02:00
return 1 ;
}
2009-11-18 11:38:15 +01:00
/**
2009-09-04 15:40:10 +02:00
* strip tags out of the message completely with their content
2009-11-18 11:38:15 +01:00
* @ param string $_body is the text to be processed
* @ param string $tag is the tagname which is to be removed . Note , that only the name of the tag is to be passed to the function
2009-12-02 15:56:41 +01:00
* without the enclosing brackets
2009-11-18 11:38:15 +01:00
* @ param string $endtag can be different from tag but should be used only , if begin and endtag are known to be different e . g .: <!-- -->
2015-04-26 13:00:33 +02:00
* @ param bool $addbracesforendtag if endtag is given , you may decide if the </ and > braces are to be added ,
2009-12-02 15:56:41 +01:00
* or if you want the string to be matched as is
2009-11-18 11:38:15 +01:00
* @ return void the modified text is passed via reference
2009-09-04 15:40:10 +02:00
*/
2009-12-02 15:56:41 +01:00
static function replaceTagsCompletley ( & $_body , $tag , $endtag = '' , $addbracesforendtag = true )
2009-09-04 15:40:10 +02:00
{
if ( $tag ) $tag = strtolower ( $tag );
2012-05-25 14:23:11 +02:00
$singleton = false ;
if ( $endtag == '/>' ) $singleton = true ;
2009-09-04 15:40:10 +02:00
if ( $endtag == '' || empty ( $endtag ) || ! isset ( $endtag ))
{
2011-11-30 10:20:05 +01:00
$endtag = $tag ;
2009-09-04 15:40:10 +02:00
} else {
2011-11-30 10:20:05 +01:00
$endtag = strtolower ( $endtag );
2012-05-25 14:23:11 +02:00
//error_log(__METHOD__.' Using EndTag:'.$endtag);
2009-09-04 15:40:10 +02:00
}
// strip tags out of the message completely with their content
if ( $_body ) {
2012-05-25 14:23:11 +02:00
if ( $singleton )
2009-12-02 15:56:41 +01:00
{
2012-12-04 15:13:36 +01:00
//$_body = preg_replace('~<'.$tag.'[^>].*? '.$endtag.'~simU','',$_body);
$_body = preg_replace ( '~<?' . $tag . '[^>].* ' . $endtag . '~simU' , '' , $_body ); // we are in Ungreedy mode, so we expect * to be ungreedy without specifying ?
2009-12-02 15:56:41 +01:00
}
2012-05-25 14:23:11 +02:00
else
2009-12-02 15:56:41 +01:00
{
2014-04-14 17:09:07 +02:00
$found = null ;
2012-05-25 14:23:11 +02:00
if ( $addbracesforendtag === true )
{
2014-04-14 17:09:07 +02:00
if ( stripos ( $_body , '<' . $tag ) !== false ) $ct = preg_match_all ( '#<' . $tag . '(?:\s.*)?>(.+)</' . $endtag . '>#isU' , $_body , $found );
if ( $ct > 0 )
{
//error_log(__METHOD__.__LINE__.array2string($found[0]));
// only replace what we have found
$_body = str_ireplace ( $found [ 0 ], '' , $_body );
}
2012-05-25 14:23:11 +02:00
// remove left over tags, unfinished ones, and so on
$_body = preg_replace ( '~<' . $tag . '[^>]*?>~si' , '' , $_body );
}
if ( $addbracesforendtag === false )
{
2014-04-14 17:09:07 +02:00
if ( stripos ( $_body , '<' . $tag ) !== false ) $ct = preg_match_all ( '#<' . $tag . '(?:\s.*)?>(.+)' . $endtag . '#isU' , $_body , $found );
if ( $ct > 0 )
{
//error_log(__METHOD__.__LINE__.array2string($found[0]));
// only replace what we have found
$_body = str_ireplace ( $found [ 0 ], '' , $_body );
}
/*
2014-02-25 16:48:23 +01:00
$_body = preg_replace ( '~<' . $tag . '[^>]*?>(.*?)' . $endtag . '~simU' , '' , $_body );
2014-04-14 17:09:07 +02:00
*/
2012-05-25 14:23:11 +02:00
// remove left over tags, unfinished ones, and so on
2015-04-26 13:00:33 +02:00
$_body = preg_replace ( array ( '~<' . $tag . '[^>]*?>~si' , '~' . $endtag . '~' ), '' , $_body );
2012-05-25 14:23:11 +02:00
}
2009-12-02 15:56:41 +01:00
}
2009-09-04 15:40:10 +02:00
}
}
2013-08-29 12:39:08 +02:00
static function transform_mailto2text ( $matches )
{
//error_log(__METHOD__.__LINE__.array2string($matches));
// this is the actual url
$matches [ 2 ] = trim ( strip_tags ( $matches [ 2 ]));
$matches [ 3 ] = trim ( strip_tags ( $matches [ 3 ]));
$matches [ 2 ] = str_replace ( array ( '%40' , '%20' ), array ( '@' , ' ' ), $matches [ 2 ]);
$matches [ 3 ] = str_replace ( array ( '%40' , '%20' ), array ( '@' , ' ' ), $matches [ 3 ]);
return $matches [ 1 ] . $matches [ 2 ] . ( $matches [ 2 ] == $matches [ 3 ] ? ' ' : ' -> ' . $matches [ 3 ] . ' ' );
}
2013-04-25 16:18:49 +02:00
static function transform_url2text ( $matches )
{
//error_log(__METHOD__.__LINE__.array2string($matches));
$linkTextislink = false ;
// this is the actual url
$matches [ 2 ] = trim ( strip_tags ( $matches [ 2 ]));
if ( $matches [ 2 ] == $matches [ 1 ]) $linkTextislink = true ;
$matches [ 1 ] = str_replace ( ' ' , '%20' , $matches [ 1 ]);
return ( $linkTextislink ? ' ' : '[ ' ) . $matches [ 1 ] . ( $linkTextislink ? '' : ' -> ' . $matches [ 2 ]) . ( $linkTextislink ? ' ' : ' ]' );
}
2009-11-18 11:38:15 +01:00
/**
* convertHTMLToText
* @ param string $_html : Text to be stripped down
* @ param string $displayCharset : charset to use ; should be a valid charset
* @ param bool $stripcrl : flag to indicate for the removal of all crlf \r\n
* @ param bool $stripalltags : flag to indicate wether or not to strip $_html from all remaining tags
* @ return text $_html : the modified text .
*/
static function convertHTMLToText ( $_html , $displayCharset = false , $stripcrl = false , $stripalltags = true )
2009-09-04 15:40:10 +02:00
{
2013-02-22 11:58:39 +01:00
// assume input isHTML, but test the input anyway, because,
// if it is not, we may not want to strip whitespace
$isHTML = true ;
if ( strlen ( strip_tags ( $_html )) == strlen ( $_html ))
{
$isHTML = false ;
// return $_html; // maybe we should not proceed at all
}
2009-09-04 15:40:10 +02:00
if ( $displayCharset === false ) $displayCharset = self :: $system_charset ;
2009-11-18 11:38:15 +01:00
//error_log(__METHOD__.$_html);
2009-09-04 15:40:10 +02:00
#print '<hr>';
#print "<pre>"; print htmlspecialchars($_html);
#print "</pre>";
#print "<hr>";
2012-06-04 16:56:45 +02:00
if ( stripos ( $_html , 'style' ) !== false ) self :: replaceTagsCompletley ( $_html , 'style' ); // clean out empty or pagewide style definitions / left over tags
if ( stripos ( $_html , 'head' ) !== false ) self :: replaceTagsCompletley ( $_html , 'head' ); // Strip out stuff in head
if ( stripos ( $_html , '![if' ) !== false && stripos ( $_html , '<![endif]>' ) !== false ) self :: replaceTagsCompletley ( $_html , '!\[if' , '<!\[endif\]>' , false ); // Strip out stuff in ifs
if ( stripos ( $_html , '!--[if' ) !== false && stripos ( $_html , '<![endif]-->' ) !== false ) self :: replaceTagsCompletley ( $_html , '!--\[if' , '<!\[endif\]-->' , false ); // Strip out stuff in ifs
2010-01-19 14:29:16 +01:00
$Rules = array ( '@<script[^>]*?>.*?</script>@siU' , // Strip out javascript
2009-09-04 15:40:10 +02:00
'@&(quot|#34);@i' , // Replace HTML entities
'@&(amp|#38);@i' , // Ampersand &
'@&(lt|#60);@i' , // Less Than <
'@&(gt|#62);@i' , // Greater Than >
'@&(nbsp|#160);@i' , // Non Breaking Space
'@&(iexcl|#161);@i' , // Inverted Exclamation point
'@&(cent|#162);@i' , // Cent
'@&(pound|#163);@i' , // Pound
'@&(copy|#169);@i' , // Copyright
'@&(reg|#174);@i' , // Registered
2010-09-15 10:39:07 +02:00
'@&(trade|#8482);@i' , // trade
2010-08-31 16:23:58 +02:00
'@'@i' , // singleQuote
2010-09-15 10:39:07 +02:00
'@(\xc2\xa0)@' , // nbsp or tab (encoded windows-style)
2013-05-07 13:38:44 +02:00
'@(\xe2\x80\x8b)@' , // ZERO WIDTH SPACE
2009-09-04 15:40:10 +02:00
);
$Replace = array ( '' ,
'"' ,
2010-09-09 12:19:23 +02:00
'#amper#sand#' ,
2009-09-04 15:40:10 +02:00
'<' ,
'>' ,
' ' ,
chr ( 161 ),
chr ( 162 ),
chr ( 163 ),
2010-09-15 10:39:07 +02:00
'(C)' , //chr(169),// copyrighgt
'(R)' , //chr(174),// registered
'(TM)' , // trade
2010-08-31 16:23:58 +02:00
" ' " ,
2010-09-15 10:39:07 +02:00
' ' ,
2013-05-07 13:38:44 +02:00
'' ,
2009-09-04 15:40:10 +02:00
);
$_html = preg_replace ( $Rules , $Replace , $_html );
2009-11-18 11:38:15 +01:00
2010-08-31 16:23:58 +02:00
// removing carriage return linefeeds, preserve those enclosed in <pre> </pre> tags
2011-04-09 11:45:41 +02:00
if ( $stripcrl === true )
2010-08-31 16:23:58 +02:00
{
2014-10-10 12:10:36 +02:00
if ( stripos ( $_html , '<pre ' ) !== false || stripos ( $_html , '<pre>' ) !== false )
2010-08-31 16:23:58 +02:00
{
$contentArr = html :: splithtmlByPRE ( $_html );
foreach ( $contentArr as $k =>& $elem )
{
2014-10-10 12:10:36 +02:00
if ( stripos ( $elem , '<pre ' ) === false && stripos ( $elem , '<pre>' ) === false )
2010-08-31 16:23:58 +02:00
{
2011-05-05 10:59:54 +02:00
//$elem = str_replace('@(\r\n)@i',' ',$elem);
2013-09-18 14:14:59 +02:00
$elem = str_replace ( array ( " \r \n " , " \n " ),( $isHTML ? '' : ' ' ), $elem );
2010-08-31 16:23:58 +02:00
}
}
$_html = implode ( '' , $contentArr );
}
2011-05-05 10:59:54 +02:00
else
{
2013-09-18 14:14:59 +02:00
$_html = str_replace ( array ( " \r \n " , " \n " ),( $isHTML ? '' : ' ' ), $_html );
2011-05-05 10:59:54 +02:00
}
2010-08-31 16:23:58 +02:00
}
2009-09-04 15:40:10 +02:00
$tags = array (
0 => '~<h[123][^>]*>\r*\n*~si' ,
1 => '~<h[456][^>]*>\r*\n*~si' ,
2 => '~<table[^>]*>\r*\n*~si' ,
3 => '~<tr[^>]*>\r*\n*~si' ,
4 => '~<li[^>]*>\r*\n*~si' ,
5 => '~<br[^>]*>\r*\n*~si' ,
6 => '~<br[^>]*>~si' ,
7 => '~<p[^>]*>\r*\n*~si' ,
8 => '~<div[^>]*>\r*\n*~si' ,
9 => '~<hr[^>]*>\r*\n*~si' ,
10 => '/<blockquote type="cite">/' ,
2010-08-31 16:23:58 +02:00
11 => '/<blockquote>/' ,
12 => '~</blockquote>~si' ,
13 => '~<blockquote[^>]*>~si' ,
2009-09-04 15:40:10 +02:00
);
$Replace = array (
0 => " \r \n " ,
1 => " \r \n " ,
2 => " \r \n " ,
3 => " \r \n " ,
4 => " \r \n " ,
5 => " \r \n " ,
6 => " \r \n " ,
7 => " \r \n " ,
8 => " \r \n " ,
9 => " \r \n __________________________________________________ \r \n " ,
10 => '#blockquote#type#cite#' ,
2010-08-31 16:23:58 +02:00
11 => '#blockquote#type#cite#' ,
12 => '#blockquote#end#cite#' ,
13 => '#blockquote#type#cite#' ,
2009-09-04 15:40:10 +02:00
);
$_html = preg_replace ( $tags , $Replace , $_html );
$_html = preg_replace ( '~</t(d|h)>\s*<t(d|h)[^>]*>~si' , ' - ' , $_html );
$_html = preg_replace ( '~<img[^>]+>~s' , '' , $_html );
2009-11-18 11:38:15 +01:00
// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
self :: replaceEmailAdresses ( $_html );
2009-09-04 15:40:10 +02:00
//convert hrefs to description -> URL
2013-04-25 16:18:49 +02:00
//$_html = preg_replace('~<a[^>]+href=\"([^"]+)\"[^>]*>(.*)</a>~si','[$2 -> $1]',$_html);
2014-02-25 16:48:23 +01:00
$_html = preg_replace_callback ( '~<a[^>]+href=\"([^"]+)\"[^>]*>(.*?)</a>~si' , 'self::transform_url2text' , $_html );
2011-05-10 17:32:44 +02:00
// reducing double \r\n to single ones, dont mess with pre sections
2013-02-22 11:58:39 +01:00
if ( $stripcrl === true && $isHTML )
2011-05-10 17:32:44 +02:00
{
2014-10-10 12:10:36 +02:00
if ( stripos ( $_html , '<pre ' ) !== false || stripos ( $_html , '<pre>' ) !== false )
2011-05-10 17:32:44 +02:00
{
$contentArr = html :: splithtmlByPRE ( $_html );
foreach ( $contentArr as $k =>& $elem )
{
2014-10-10 12:10:36 +02:00
if ( stripos ( $elem , '<pre ' ) === false && stripos ( $elem , '<pre>' ) === false )
2011-05-10 17:32:44 +02:00
{
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
}
// strip out whitespace inbetween CR/LF
$elem = preg_replace ( '~\r\n\s+\r\n~si' , " \r \n \r \n " , $elem );
2011-08-04 09:27:23 +02:00
// strip out / reduce exess CR/LF
2011-05-10 17:32:44 +02:00
$elem = preg_replace ( '~\r\n{3,}~si' , " \r \n \r \n " , $elem );
}
}
$_html = implode ( '' , $contentArr );
}
else
{
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
}
// strip out whitespace inbetween CR/LF
$_html = preg_replace ( '~\r\n\s+\r\n~si' , " \r \n \r \n " , $_html );
2011-08-04 09:27:23 +02:00
// strip out / reduce exess CR/LF
2011-05-10 17:32:44 +02:00
$_html = preg_replace ( '~(\r\n){3,}~si' , " \r \n \r \n " , $_html );
}
}
2009-11-18 11:38:15 +01:00
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
2011-05-11 12:39:55 +02:00
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
2009-11-18 11:38:15 +01:00
//$_html = strip_tags($_html, '<a>');
}
2013-02-22 11:58:39 +01:00
// reducing spaces (not for input that was plain text from the beginning)
if ( $isHTML ) $_html = preg_replace ( '~ +~s' , ' ' , $_html );
2010-09-09 12:19:23 +02:00
// restoring ampersands
$_html = str_replace ( '#amper#sand#' , '&' , $_html );
2011-05-05 10:59:54 +02:00
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' -> '.$_html);
2009-09-04 15:40:10 +02:00
$_html = html_entity_decode ( $_html , ENT_COMPAT , $displayCharset );
2011-05-05 10:59:54 +02:00
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' After html_entity_decode: -> '.$_html);
2009-11-18 11:38:15 +01:00
//self::replaceEmailAdresses($_html);
2009-09-04 15:40:10 +02:00
$pos = strpos ( $_html , 'blockquote' );
2010-09-09 12:19:23 +02:00
//error_log("convert HTML2Text: $_html");
2009-09-04 15:40:10 +02:00
if ( $pos === false ) {
return $_html ;
} else {
$indent = 0 ;
$indentString = '' ;
2010-08-31 16:23:58 +02:00
$quoteParts = preg_split ( '/#blockquote#type#cite#/' , $_html , - 1 , PREG_SPLIT_OFFSET_CAPTURE );
2009-09-04 15:40:10 +02:00
foreach ( $quoteParts as $quotePart ) {
if ( $quotePart [ 1 ] > 0 ) {
$indent ++ ;
$indentString .= '>' ;
}
2010-08-31 16:23:58 +02:00
$quoteParts2 = preg_split ( '/#blockquote#end#cite#/' , $quotePart [ 0 ], - 1 , PREG_SPLIT_OFFSET_CAPTURE );
2009-09-04 15:40:10 +02:00
foreach ( $quoteParts2 as $quotePart2 ) {
if ( $quotePart2 [ 1 ] > 0 ) {
$indent -- ;
$indentString = substr ( $indentString , 0 , $indent );
}
$quoteParts3 = explode ( " \r \n " , $quotePart2 [ 0 ]);
foreach ( $quoteParts3 as $quotePart3 ) {
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'Line:'.$quotePart3);
2009-09-04 15:40:10 +02:00
$allowedLength = 76 - strlen ( " \r \n $indentString " );
2009-11-18 11:38:15 +01:00
// only break lines, if not already indented
2010-09-09 12:19:23 +02:00
if ( substr ( $quotePart3 , 0 , strlen ( $indentString )) != $indentString )
2009-11-18 11:38:15 +01:00
{
if ( strlen ( $quotePart3 ) > $allowedLength ) {
$s = explode ( " " , $quotePart3 );
$quotePart3 = " " ;
$linecnt = 0 ;
foreach ( $s as $k => $v ) {
$cnt = strlen ( $v );
// only break long words within the wordboundaries,
// but it may destroy links, so we check for href and dont do it if we find it
if ( $cnt > $allowedLength && stripos ( $v , 'href=' ) === false ) {
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'LongWordFound:'.$v);
2009-11-18 11:38:15 +01:00
$v = wordwrap ( $v , $allowedLength , " \r \n $indentString " , true );
}
// the rest should be broken at the start of the new word that exceeds the limit
if ( $linecnt + $cnt > $allowedLength ) {
$v = " \r \n $indentString $v " ;
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'breaking here:'.$v);
2009-11-18 11:38:15 +01:00
$linecnt = 0 ;
} else {
$linecnt += $cnt ;
}
if ( strlen ( $v )) $quotePart3 .= ( strlen ( $quotePart3 ) ? " " : " " ) . $v ;
2009-09-04 15:40:10 +02:00
}
}
}
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'partString to return:'.$indentString . $quotePart3);
2009-09-04 15:40:10 +02:00
$asciiTextBuff [] = $indentString . $quotePart3 ;
}
}
}
return implode ( " \r \n " , $asciiTextBuff );
}
}
2008-03-15 16:30:15 +01:00
}