2005-11-05 09:51:06 +01:00
< ? php
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* EGroupware API - Translations
2009-04-20 13:59:39 +02:00
*
2008-03-15 16:30:15 +01:00
* @ link http :// www . egroupware . org
* @ author Joseph Engo < jengo @ phpgroupware . org >
* @ author Dan Kuykendall < seek3r @ phpgroupware . org >
* Copyright ( C ) 2000 , 2001 Joseph Engo
* @ license http :// opensource . org / licenses / lgpl - license . php LGPL - GNU Lesser General Public License
* @ package api
* @ version $Id $
*/
/**
2012-11-05 13:18:22 +01:00
* EGroupware API - Translations
2009-04-20 13:59:39 +02:00
*
* All methods of this class can now be called static .
*
* Translations are cached tree - wide via egw_cache class .
2012-11-05 13:18:22 +01:00
*
* Translations are no longer stored in database , but load directly from *. lang files into cache .
* Only exception as instance specific translations : mainscreen , loginscreen and custom ( see $instance_specific_translations )
2008-03-15 16:30:15 +01:00
*/
class translation
{
2009-04-20 13:59:39 +02:00
/**
* Language of current user , will be set by init ()
2008-03-15 16:30:15 +01:00
*
2009-04-20 13:59:39 +02:00
* @ var string
2008-03-15 16:30:15 +01:00
*/
2009-04-20 13:59:39 +02:00
static $userlang = 'en' ;
2008-03-15 16:30:15 +01:00
/**
2009-04-20 13:59:39 +02:00
* Already loaded translations by applicaton
2008-03-15 16:30:15 +01:00
*
2009-04-20 13:59:39 +02:00
* @ var array $app => $lang pairs
2008-03-15 16:30:15 +01:00
*/
2009-04-20 13:59:39 +02:00
static $loaded_apps = array ();
2008-03-15 16:30:15 +01:00
/**
2009-04-20 13:59:39 +02:00
* Loaded phrases
*
* @ var array $message_id => $translation pairs
2008-03-15 16:30:15 +01:00
*/
2009-04-20 13:59:39 +02:00
static $lang_arr = array ();
2005-11-05 09:51:06 +01:00
2009-04-20 13:59:39 +02:00
/**
* Tables used by this class
*/
const LANG_TABLE = 'egw_lang' ;
const LANGUAGES_TABLE = 'egw_languages' ;
2005-11-05 09:51:06 +01:00
2009-04-20 13:59:39 +02:00
/**
2012-11-05 13:18:22 +01:00
* Directory for language files
2009-04-20 13:59:39 +02:00
*/
2012-11-05 13:18:22 +01:00
const LANG_DIR = 'lang' ;
2009-04-20 13:59:39 +02:00
/**
2012-11-05 13:18:22 +01:00
* Prefix of language files
2009-04-20 13:59:39 +02:00
*/
2012-11-05 13:18:22 +01:00
const LANGFILE_PREFIX = 'egw_' ;
2009-04-20 13:59:39 +02:00
2012-11-05 13:18:22 +01:00
/**
* Prefix of language files
*/
const LANGFILE_EXTENSION = '.lang' ;
2009-04-20 13:59:39 +02:00
/**
* Reference to global db - class
*
* @ var egw_db
*/
static $db ;
/**
* Mark untranslated strings with an asterisk ( * )
*
* @ var boolean
*/
static $markuntranslated = false ;
/**
* System charset
*
* @ var string
*/
static $system_charset ;
/**
* Is the mbstring extension available
*
* @ var boolean
*/
static $mbstring ;
/**
* Internal encoding / charset of mbstring ( if loaded )
*
* @ var string
*/
static $mbstring_internal_encoding ;
2005-11-05 09:51:06 +01:00
2009-04-28 17:56:04 +02:00
/**
* Application which translations have to be cached instance - and NOT tree - specific
*
* @ var array
*/
2011-11-17 13:43:59 +01:00
static $instance_specific_translations = array ( 'loginscreen' , 'mainscreen' , 'custom' );
2009-04-28 17:56:04 +02:00
2008-03-15 16:30:15 +01:00
/**
* returns the charset to use ( ! $lang ) or the charset of the lang - files or $lang
*
* @ param string / boolean $lang = False return charset of the active user - lang , or $lang if specified
* @ return string charset
*/
2009-04-20 13:59:39 +02:00
static function charset ( $lang = False )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
static $charsets = array ();
2008-03-15 16:30:15 +01:00
if ( $lang )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! isset ( $charsets [ $lang ]))
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! ( $charsets [ $lang ] = self :: $db -> select ( self :: LANG_TABLE , 'content' , array (
2008-03-15 16:30:15 +01:00
'lang' => $lang ,
'message_id' => 'charset' ,
'app_name' => 'common' ,
2009-04-20 13:59:39 +02:00
), __LINE__ , __FILE__ ) -> fetchColumn ()))
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
$charsets [ $lang ] = 'utf-8' ;
2005-11-05 09:51:06 +01:00
}
}
2009-04-20 13:59:39 +02:00
return $charsets [ $lang ];
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
if ( self :: $system_charset ) // do we have a system-charset ==> return it
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
$charset = self :: $system_charset ;
2008-03-15 16:30:15 +01:00
}
else
{
// if no translations are loaded (system-startup) use a default, else lang('charset')
2009-04-20 13:59:39 +02:00
$charset = ! self :: $lang_arr ? 'utf-8' : strtolower ( self :: translate ( 'charset' ));
2005-11-05 09:51:06 +01:00
}
2011-11-10 11:24:37 +01:00
// in case no charset is set, default to utf-8
if ( empty ( $charset ) || $charset == 'charset' ) $charset = 'utf-8' ;
2008-03-15 16:30:15 +01:00
// we need to set our charset as mbstring.internal_encoding if mbstring.func_overlaod > 0
// else we get problems for a charset is different from the default utf-8
2009-04-20 13:59:39 +02:00
if ( ini_get ( 'mbstring.func_overload' ) && self :: $mbstring_internal_encoding != $charset )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
ini_set ( 'mbstring.internal_encoding' , self :: $mbstring_internal_encoding = $charset );
}
2008-03-15 16:30:15 +01:00
return $charset ;
}
2005-11-05 09:51:06 +01:00
2008-03-15 16:30:15 +01:00
/**
* Initialises global lang - array and loads the 'common' and app - spec . translations
2009-04-20 13:59:39 +02:00
*
* @ param boolean $load_translations = true should we also load translations for common and currentapp
2008-03-15 16:30:15 +01:00
*/
2009-04-20 13:59:39 +02:00
static function init ( $load_translations = true )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! isset ( self :: $db ))
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
self :: $db = isset ( $GLOBALS [ 'egw_setup' ]) && isset ( $GLOBALS [ 'egw_setup' ] -> db ) ? $GLOBALS [ 'egw_setup' ] -> db : $GLOBALS [ 'egw' ] -> db ;
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
if ( ! isset ( $GLOBALS [ 'egw_setup' ]))
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
self :: $system_charset = $GLOBALS [ 'egw_info' ][ 'server' ][ 'system_charset' ];
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
else
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
self :: $system_charset =& $GLOBALS [ 'egw_setup' ] -> system_charset ;
}
if (( self :: $mbstring = check_load_extension ( 'mbstring' )))
{
if ( ! empty ( self :: $system_charset ))
{
ini_set ( 'mbstring.internal_encoding' , self :: $system_charset );
}
}
self :: $markuntranslated = ( boolean ) $GLOBALS [ 'egw_info' ][ 'server' ][ 'markuntranslated' ];
2012-11-05 13:18:22 +01:00
if ( isset ( $GLOBALS [ 'egw_info' ][ 'server' ][ 'translation_load_via' ]))
{
self :: $load_via = $GLOBALS [ 'egw_info' ][ 'server' ][ 'translation_load_via' ];
//error_log(__METHOD__."() load_via set from config to ".array2string(self::$load_via));
}
2009-04-20 13:59:39 +02:00
if ( $load_translations )
{
2010-05-20 21:32:52 +02:00
self :: $lang_arr = self :: $loaded_apps = array ();
2009-04-20 13:59:39 +02:00
if ( $GLOBALS [ 'egw_info' ][ 'user' ][ 'preferences' ][ 'common' ][ 'lang' ])
{
self :: $userlang = $GLOBALS [ 'egw_info' ][ 'user' ][ 'preferences' ][ 'common' ][ 'lang' ];
}
self :: add_app ( 'common' );
if ( ! count ( self :: $lang_arr ))
{
self :: $userlang = 'en' ;
self :: add_app ( 'common' );
}
self :: add_app ( $GLOBALS [ 'egw_info' ][ 'flags' ][ 'currentapp' ]);
2011-11-17 13:43:59 +01:00
// load instance specific translations
self :: add_app ( 'custom' );
2008-03-15 16:30:15 +01:00
}
}
/**
* translates a phrase and evtl . substitute some variables
*
* @ param string $key phrase to translate , may contain placeholders % N ( N = 1 , 2 , ... ) for vars
* @ param array / boolean $vars = false vars to replace the placeholders , or false for none
* @ param string $not_found = '*' what to add to not found phrases , default '*'
* @ return string with translation
*/
2009-04-20 13:59:39 +02:00
static function translate ( $key , $vars = false , $not_found = '*' )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! self :: $lang_arr )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
self :: init ();
2008-03-15 16:30:15 +01:00
}
$ret = $key ; // save key if we dont find a translation
2009-04-20 13:59:39 +02:00
if ( $not_found && self :: $markuntranslated ) $ret .= $not_found ;
2005-11-05 09:51:06 +01:00
2009-04-20 13:59:39 +02:00
if ( isset ( self :: $lang_arr [ $key ]))
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
$ret = self :: $lang_arr [ $key ];
2008-03-15 16:30:15 +01:00
}
else
{
2012-11-05 13:18:22 +01:00
$new_key = strtolower ( $key );
2008-03-15 16:30:15 +01:00
2009-04-20 13:59:39 +02:00
if ( isset ( self :: $lang_arr [ $new_key ]))
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
$ret = self :: $lang_arr [ $new_key ];
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
}
if ( is_array ( $vars ) && count ( $vars ))
{
if ( count ( $vars ) > 1 )
2005-11-05 09:51:06 +01:00
{
2009-11-27 14:16:20 +01:00
static $placeholders = array ( '%3' , '%2' , '%1' , '|%2|' , '|%3|' , '%4' , '%5' , '%6' , '%7' , '%8' , '%9' , '%10' );
2009-10-19 09:41:03 +02:00
// to cope with $vars[0] containing '%2' (eg. an urlencoded path like a referer),
// we first replace '%2' in $ret with '|%2|' and then use that as 2. placeholder
2009-11-27 14:16:20 +01:00
// we do that for %3 as well, ...
2009-11-28 12:40:10 +01:00
$vars = array_merge ( array ( '|%3|' , '|%2|' ), $vars ); // push '|%2|' (and such) as first replacement on $vars
2009-04-20 13:59:39 +02:00
$ret = str_replace ( $placeholders , $vars , $ret );
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
else
2005-11-05 09:51:06 +01:00
{
2008-03-15 16:30:15 +01:00
$ret = str_replace ( '%1' , $vars [ 0 ], $ret );
2005-11-05 09:51:06 +01:00
}
}
2008-03-15 16:30:15 +01:00
return $ret ;
}
/**
2009-04-20 13:59:39 +02:00
* Adds translations for an application
*
* By default the translations are read from the tree - wide cache
2008-03-15 16:30:15 +01:00
*
2012-11-05 13:18:22 +01:00
* @ param string | array $apps name ( s ) of application ( s ) to add ( or 'common' for the general translations )
* if multiple names given , they are requested in one request from cache and loaded in given order
* @ param string $lang = false 2 or 5 char lang - code or false for the users language
2008-03-15 16:30:15 +01:00
*/
2012-11-05 13:18:22 +01:00
static function add_app ( $apps , $lang = null )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
$lang = $lang ? $lang : self :: $userlang ;
2012-11-05 13:18:22 +01:00
$tree_level = $instance_level = array ();
foreach (( array ) $apps as $app )
{
if ( ! isset ( self :: $loaded_apps [ $app ]) || self :: $loaded_apps [ $app ] != $lang && $app != 'common' )
{
if ( in_array ( $app , self :: $instance_specific_translations ))
{
$instance_level [] = $app ;
}
else
{
$tree_level [] = $app ;
}
}
}
2011-11-17 13:43:59 +01:00
if ( $app == 'custom' ) $lang = 'en' ; // custom translations use only 'en'
2009-04-20 13:59:39 +02:00
if ( ! isset ( self :: $loaded_apps [ $app ]) || self :: $loaded_apps [ $app ] != $lang )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
//$start = microtime(true);
2009-04-28 17:56:04 +02:00
// for loginscreen we have to use a instance specific cache!
2012-11-05 13:18:22 +01:00
$instance_specific = in_array ( $app , self :: $instance_specific_translations );
2012-03-04 14:33:10 +01:00
$loaded = egw_cache :: getCache ( $instance_specific ? egw_cache :: INSTANCE : egw_cache :: TREE ,
2011-11-30 10:20:05 +01:00
__CLASS__ , $app . ':' . $lang );
// do NOT use automatic callback to cache result, as installing languages in setup can create
// a racecondition, therefore only cache existing non-instance-specific translations,
// never cache nothing found === array(), instance-specific translations can and should always be cached!
//error_log(__METHOD__."('$app', '$lang') egw_cache::getCache() returned ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded)));
if ( ! $loaded && ( ! $instance_specific || is_null ( $loaded )))
{
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."('$app', '$lang') instance_specific=$instance_specific, egw_cache::getCache() returned ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded)));
if ( $instance_specific )
{
$loaded =& self :: load_app ( $app , $lang );
}
else
{
$loaded =& self :: load_app_files ( $app , $lang );
}
//error_log(__METHOD__."('$app', '$lang') instance_specific=$instance_specific, load_app(_files)() returned ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded)));
2011-11-30 10:20:05 +01:00
if ( $loaded || $instance_specific )
{
2012-11-05 13:18:22 +01:00
$ok = egw_cache :: setCache ( $instance_specific ? egw_cache :: INSTANCE : egw_cache :: TREE ,
2011-11-30 10:20:05 +01:00
__CLASS__ , $app . ':' . $lang , $loaded );
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."('$app', '$lang') caching now ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded))." egw_cache::setCache() returned ".array2string($ok));
2011-11-30 10:20:05 +01:00
}
}
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."('$app', '$lang') loaded = ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded)));
2009-07-24 12:16:00 +02:00
// we have to use array_merge! (+= does not overwrite common translations with different ones in an app)
// array_merge messes up translations of numbers, which make no sense and should be avoided anyway.
2012-11-05 13:18:22 +01:00
if ( $loaded ) self :: $lang_arr = array_merge ( self :: $lang_arr , $loaded );
2009-04-20 13:59:39 +02:00
self :: $loaded_apps [ $app ] = $lang ;
//error_log(__METHOD__."($app,$lang) took ".(1000*(microtime(true)-$start))." ms, loaded ".count($loaded)." phrases -> total=".count(self::$lang_arr).": ".function_backtrace());
}
}
/**
* Loads translations for an application from the database or direct from the lang - file for setup
*
* Never use directly , use add_app (), which employes caching ( it has to be public , to act as callback for the cache ! ) .
*
* @ param string $app name of the application to add ( or 'common' for the general translations )
* @ param string $lang = false 2 or 5 char lang - code or false for the users language
* @ return array the loaded strings
*/
static function & load_app ( $app , $lang )
{
//$start = microtime(true);
2012-11-05 13:18:22 +01:00
if ( is_null ( self :: $db )) self :: init ( false );
$loaded = array ();
foreach ( self :: $db -> select ( self :: LANG_TABLE , 'message_id,content' , array (
'lang' => $lang ,
'app_name' => $app ,
), __LINE__ , __FILE__ ) as $row )
2009-04-20 13:59:39 +02:00
{
2012-11-05 13:18:22 +01:00
$loaded [ strtolower ( $row [ 'message_id' ])] = $row [ 'content' ];
2009-04-20 13:59:39 +02:00
}
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."($app,$lang) took ".(1000*(microtime(true)-$start))." ms to load ".count($loaded)." phrases");
return $loaded ;
}
/**
* How to load translations for a given app
*
* Translations for common , preferences or admin are in spread over all applications .
* API has translations for some pseudo - apps .
*
* @ var array app => app ( s ) or string 'all-apps'
*/
static $load_via = array (
'common' => 'all-apps' ,
'preferences' => 'all-apps' ,
'admin' => 'all-apps' ,
'jscalendar' => 'phpgwapi' ,
'sitemgr-link' => 'sitemgr' ,
'groupdav' => 'phpgwapi' ,
'login' => 'phpgwapi' ,
);
/**
* Check if cached translations are up to date or invalidate cache if not
*
* Called via login . php for each interactive login .
*/
static function check_invalidate_cache ()
{
$lang = $GLOBALS [ 'egw_info' ][ 'user' ][ 'preferences' ][ 'common' ][ 'lang' ];
$apps = array_keys ( $GLOBALS [ 'egw_info' ][ 'user' ][ 'apps' ]);
$apps [] = 'phpgwapi' ; // check the api too
foreach ( $apps as $app )
2009-04-20 13:59:39 +02:00
{
2012-11-05 13:18:22 +01:00
$file = self :: get_lang_file ( $app , $lang );
// check if file has changed compared to what's cached
if ( file_exists ( $file ) && egw_cache :: getTree ( __CLASS__ , $file ) != filectime ( $file ))
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."() $file modified");
self :: invalidate_lang_file ( $app , $lang );
2005-11-05 09:51:06 +01:00
}
}
2012-11-05 13:18:22 +01:00
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* Invalidate cache for lang - file of $app and $lang
2008-03-15 16:30:15 +01:00
*
2012-11-05 13:18:22 +01:00
* @ param string $app
* @ param string $lang
2008-03-15 16:30:15 +01:00
*/
2012-11-05 13:18:22 +01:00
static function invalidate_lang_file ( $app , $lang )
2008-03-15 16:30:15 +01:00
{
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."('$app', '$lang') invalidate translations $app:$lang");
egw_cache :: unsetTree ( __CLASS__ , $app . ':' . $lang );
foreach ( self :: $load_via as $load => $via )
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
if ( $via == 'all-apps' || $via == $app )
{
//error_log(__METHOD__."('$app', '$lang') additional invalidate translations $load:$lang");
egw_cache :: unsetTree ( __CLASS__ , $load . ':' . $lang );
}
}
}
/**
* Loads translations for an application direct from the lang - file ( s )
*
* Never use directly , use add_app (), which employes caching ( it has to be public , to act as callback for the cache ! ) .
*
* @ param string $app name of the application to add ( or 'common' for the general translations )
* @ param string $lang = false 2 or 5 char lang - code or false for the users language
* @ return array the loaded strings
*/
static function & load_app_files ( $app , $lang )
{
$start = microtime ( true );
$load_app = isset ( self :: $load_via [ $app ]) ? self :: $load_via [ $app ] : $app ;
$loaded = array ();
foreach ( $load_app == 'all-apps' ? scandir ( EGW_SERVER_ROOT ) : ( array ) $load_app as $app_dir )
{
if ( ! is_dir ( EGW_SERVER_ROOT . '/' . $app_dir ) ||
!@ file_exists ( $file = self :: get_lang_file ( $app_dir , $lang )) ||
! ( $f = fopen ( $file , 'r' )))
{
continue ;
}
// store ctime of file we parse
egw_cache :: setTree ( __CLASS__ , $file , filectime ( $file ));
$line_nr = 0 ;
while (( $line = fgetcsv ( $f , 1024 , " \t " )))
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
++ $line_nr ;
if ( count ( $line ) != 4 ) continue ;
list ( $l_id , $l_app , $l_lang , $l_translation ) = $line ;
if ( $l_lang != $lang ) continue ;
if ( $l_app != $app )
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
// check if $l_app contained in file in $app_dir is mentioned in $load_via
if ( $l_app != $app_dir && ( ! isset ( self :: $load_via [ $l_app ]) ||
! array_intersect (( array ) self :: $load_via [ $l_app ], array ( 'all-apps' , $app_dir ))))
{
if ( ! in_array ( $l_app , array ( 'common' , 'login' )) && ! file_exists ( EGW_SERVER_ROOT . '/' . $l_app ))
{
error_log ( __METHOD__ . " () lang file $file contains invalid app ' $l_app ' on line $line_nr --> ignored " );
continue ;
}
// if not update load_via accordingly and store it as config
//error_log(__METHOD__."() load_via does not contain $l_app => $app_dir");
if ( ! isset ( self :: $load_via [ $l_app ])) self :: $load_via [ $l_app ] = array ( $l_app );
if ( ! is_array ( self :: $load_via [ $l_app ])) self :: $load_via [ $l_app ] = array ( self :: $load_via [ $l_app ]);
self :: $load_via [ $l_app ][] = $app_dir ;
config :: save_value ( 'translation_load_via' , self :: $load_via , 'phpgwapi' );
}
continue ;
2005-11-05 09:51:06 +01:00
}
2012-11-05 13:18:22 +01:00
$loaded [ $l_id ] = $l_translation ;
2005-11-05 09:51:06 +01:00
}
2012-11-05 13:18:22 +01:00
fclose ( $f );
2005-11-05 09:51:06 +01:00
}
2012-11-05 13:18:22 +01:00
//error_log(__METHOD__."('$app', '$lang') returning ".(is_array($loaded)?'Array('.count($loaded).')':array2string($loaded))." in ".number_format(microtime(true)-$start,3)." secs".' '.function_backtrace());
return $loaded ;
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2008-03-15 16:30:15 +01:00
/**
* Cached languages
*
* @ var array
*/
2009-04-20 13:59:39 +02:00
static $langs ;
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* Returns a list of available languages / translations
2008-03-15 16:30:15 +01:00
*
2012-11-05 13:18:22 +01:00
* @ param boolean $translate = true translate language - names
2008-03-15 16:30:15 +01:00
* @ param boolean $force_read = false force a re - read of the languages
* @ return array with lang - code => descriptiv lang - name pairs
*/
2012-11-05 13:18:22 +01:00
static function get_available_langs ( $translate = true , $force_read = false )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( ! is_array ( self :: $langs ) || $force_read )
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
if ( ! ( $f = fopen ( $file = EGW_SERVER_ROOT . '/setup/lang/languages' , 'rb' )))
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
throw new egw_exception ( " List of available languages (%1) missing! " , $file );
2005-11-05 09:51:06 +01:00
}
2012-11-05 13:18:22 +01:00
while (( $line = fgetcsv ( $f , null , " \t " )))
2005-11-05 09:51:06 +01:00
{
2012-11-05 13:18:22 +01:00
self :: $langs [ $line [ 0 ]] = $line [ 1 ];
}
fclose ( $f );
if ( $translate )
{
if ( is_null ( self :: $db )) self :: init ( false );
foreach ( self :: $langs as $lang => $name )
{
self :: $langs [ $lang ] = self :: translate ( $name , False , '' );
}
2005-11-05 09:51:06 +01:00
}
2009-04-20 13:59:39 +02:00
uasort ( self :: $langs , 'strcasecmp' );
2005-11-05 09:51:06 +01:00
}
2009-04-20 13:59:39 +02:00
return self :: $langs ;
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2009-04-23 11:47:22 +02:00
/**
2012-11-05 13:18:22 +01:00
* Returns a list of installed languages / translations
2009-04-23 11:47:22 +02:00
*
2012-11-05 13:18:22 +01:00
* Translations no longer need to be installed , therefore all available translations are returned here .
2009-04-23 11:47:22 +02:00
*
2012-11-05 13:18:22 +01:00
* @ param boolean $force_read = false force a re - read of the languages
* @ return array with lang - code => descriptiv lang - name pairs
2009-04-23 11:47:22 +02:00
*/
2012-11-05 13:18:22 +01:00
static function get_installed_langs ( $force_read = false )
2009-04-23 11:47:22 +02:00
{
2012-11-05 13:18:22 +01:00
return self :: get_available_langs ( $force_read = false );
2009-04-23 11:47:22 +02:00
}
2008-03-15 16:30:15 +01:00
/**
* translates a 2 or 5 char lang - code into a ( verbose ) language
*
* @ param string $lang
* @ return string / false language or false if not found
*/
2009-04-20 13:59:39 +02:00
static function lang2language ( $lang )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( isset ( self :: $langs [ $lang ])) // no need to query the DB
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
return self :: $langs [ $lang ];
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
return self :: $db -> select ( self :: LANGUAGES_TABLE , 'lang_name' , array ( 'lang_id' => $lang ), __LINE__ , __FILE__ ) -> fetchColumn ();
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2008-03-15 16:30:15 +01:00
/**
2012-11-05 13:18:22 +01:00
* List all languages , first available ones , then the rest
2008-03-15 16:30:15 +01:00
*
2009-04-23 11:47:22 +02:00
* @ param boolean $force_read = false
2008-03-15 16:30:15 +01:00
* @ return array with lang_id => lang_name pairs
*/
2009-04-23 11:47:22 +02:00
static function list_langs ( $force_read = false )
2008-03-15 16:30:15 +01:00
{
2009-04-23 11:47:22 +02:00
if ( ! $force_read )
{
2009-05-02 18:45:38 +02:00
return egw_cache :: getInstance ( __CLASS__ , 'list_langs' , array ( __CLASS__ , 'list_langs' ), array ( true ));
2009-04-23 11:47:22 +02:00
}
2012-11-05 13:18:22 +01:00
$languages = self :: get_installed_langs (); // available languages
$availible = " (' " . implode ( " ',' " , array_keys ( $languages )) . " ') " ;
2009-04-20 13:59:39 +02:00
2008-03-15 16:30:15 +01:00
// this shows first the installed, then the available and then the rest
2009-04-20 13:59:39 +02:00
foreach ( self :: $db -> select ( self :: LANGUAGES_TABLE , array (
2008-03-15 16:30:15 +01:00
'lang_id' , 'lang_name' ,
" CASE WHEN lang_id IN $availible THEN 1 ELSE 0 END AS availible " ,
), " lang_id NOT IN (' " . implode ( " ',' " , array_keys ( $languages )) . " ') " , __LINE__ , __FILE__ , false , ' ORDER BY availible DESC,lang_name' ) as $row )
{
$languages [ $row [ 'lang_id' ]] = $row [ 'lang_name' ];
}
return $languages ;
}
2005-11-05 09:51:06 +01:00
2012-11-05 13:18:22 +01:00
/**
2009-11-28 12:34:40 +01:00
* provides centralization and compatibility to locate the lang files
2011-08-04 09:27:23 +02:00
*
* @ param string $app application name
* @ param string $lang language code
* @ return the full path of the filename for the requested app and language
*/
static function get_lang_file ( $app , $lang )
2009-11-28 12:34:40 +01:00
{
2012-11-05 13:18:22 +01:00
return EGW_SERVER_ROOT . '/' . $app . '/' . self :: LANG_DIR . '/' . self :: LANGFILE_PREFIX . $lang . self :: LANGFILE_EXTENSION ;
2009-11-28 12:34:40 +01:00
}
2008-03-15 16:30:15 +01:00
/**
* returns a list of installed charsets
*
* @ return array with charset as key and comma - separated list of langs useing the charset as data
*/
2009-04-20 13:59:39 +02:00
static function get_installed_charsets ()
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
static $charsets ;
2008-03-15 16:30:15 +01:00
2009-04-20 13:59:39 +02:00
if ( ! isset ( $charsets ))
{
$charsets = array (
2011-12-14 22:34:24 +01:00
'utf-8' => lang ( 'Unicode' ) . ' (utf-8)' ,
2009-05-11 10:10:05 +02:00
'iso-8859-1' => lang ( 'Western european' ) . ' (iso-8859-1)' ,
'iso-8859-2' => lang ( 'Eastern european' ) . ' (iso-8859-2)' ,
'iso-8859-7' => lang ( 'Greek' ) . ' (iso-8859-7)' ,
'euc-jp' => lang ( 'Japanese' ) . ' (euc-jp)' ,
'euc-kr' => lang ( 'Korean' ) . ' (euc-kr)' ,
'koi8-r' => lang ( 'Russian' ) . ' (koi8-r)' ,
'windows-1251' => lang ( 'Bulgarian' ) . ' (windows-1251)' ,
2011-12-14 22:34:24 +01:00
'cp850' => lang ( 'DOS International' ) . ' (CP850)' ,
2009-04-20 13:59:39 +02:00
);
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
return $charsets ;
2008-03-15 16:30:15 +01:00
}
2005-11-05 09:51:06 +01:00
2008-03-15 16:30:15 +01:00
/**
* converts a string $data from charset $from to charset $to
*
* @ param string / array $data string ( s ) to convert
* @ param string / boolean $from charset $data is in or False if it should be detected
* @ param string / boolean $to charset to convert to or False for the system - charset the converted string
2009-04-20 13:59:39 +02:00
* @ param boolean $check_to_from = true internal to bypass all charset replacements
2008-03-15 16:30:15 +01:00
* @ return string / array converted string ( s ) from $data
*/
2009-04-20 13:59:39 +02:00
static function convert ( $data , $from = False , $to = False , $check_to_from = true )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( $check_to_from )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
if ( $from ) $from = strtolower ( $from );
if ( $to ) $to = strtolower ( $to );
if ( ! $from )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
$from = self :: $mbstring ? strtolower ( mb_detect_encoding ( $data )) : 'iso-8859-1' ;
if ( $from == 'ascii' )
{
$from = 'iso-8859-1' ;
}
//echo "<p>autodetected charset of '$data' = '$from'</p>\n";
}
/*
php does not seem to support gb2312
but seems to be able to decode it as EUC - CN
*/
switch ( $from )
{
case 'gb2312' :
case 'gb18030' :
$from = 'EUC-CN' ;
break ;
2012-06-04 13:58:08 +02:00
case 'windows-1252' :
2012-08-01 16:53:33 +02:00
case 'mswin1252' :
2012-06-04 13:58:08 +02:00
if ( function_exists ( 'iconv' ))
{
$prefer_iconv = true ;
break ;
}
// fall throught to remap to iso-8859-1
2009-04-20 13:59:39 +02:00
case 'us-ascii' :
case 'macroman' :
case 'iso8859-1' :
case 'windows-1258' :
$from = 'iso-8859-1' ;
break ;
case 'windows-1250' :
$from = 'iso-8859-2' ;
break ;
2009-05-26 15:42:05 +02:00
case 'windows-1257' :
$from = 'iso-8859-13' ;
break ;
2010-05-11 17:38:39 +02:00
case 'windows-874' :
case 'tis-620' :
2012-06-04 13:58:08 +02:00
case 'windows-1256' :
2010-05-11 17:38:39 +02:00
$prefer_iconv = true ;
break ;
2009-04-20 13:59:39 +02:00
}
if ( ! $to )
{
$to = self :: charset ();
}
if ( $from == $to || ! $from || ! $to || ! $data )
{
return $data ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
if ( is_array ( $data ))
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
foreach ( $data as $key => $str )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
$ret [ $key ] = self :: convert ( $str , $from , $to , false ); // false = bypass the above checks, as they are already done
2005-11-05 09:51:06 +01:00
}
2009-04-20 13:59:39 +02:00
return $ret ;
2008-03-15 16:30:15 +01:00
}
if ( $from == 'iso-8859-1' && $to == 'utf-8' )
{
return utf8_encode ( $data );
}
if ( $to == 'iso-8859-1' && $from == 'utf-8' )
{
return utf8_decode ( $data );
}
2010-05-11 17:38:39 +02:00
if ( self :: $mbstring && ! $prefer_iconv && ( $data = @ mb_convert_encoding ( $data , $to , $from )) != '' )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
return $data ;
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
if ( function_exists ( 'iconv' ))
2008-03-15 16:30:15 +01:00
{
// iconv can not convert from/to utf7-imap
2009-04-20 13:59:39 +02:00
if ( $to == 'utf7-imap' && function_exists ( imap_utf7_encode ))
2005-11-05 09:51:06 +01:00
{
2008-03-15 16:30:15 +01:00
$convertedData = iconv ( $from , 'iso-8859-1' , $data );
$convertedData = imap_utf7_encode ( $convertedData );
2009-04-20 13:59:39 +02:00
2008-03-15 16:30:15 +01:00
return $convertedData ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
2009-04-20 13:59:39 +02:00
if ( $from == 'utf7-imap' && function_exists ( imap_utf7_decode ))
2005-11-05 09:51:06 +01:00
{
2008-03-15 16:30:15 +01:00
$convertedData = imap_utf7_decode ( $data );
$convertedData = iconv ( 'iso-8859-1' , $to , $convertedData );
return $convertedData ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
// the following is to workaround patch #962307
2009-04-20 13:59:39 +02:00
// if using EUC-CN, for iconv it strickly follow GB2312 and fail
// in an email on the first Traditional/Japanese/Korean character,
// but in reality when people send mails in GB2312, UMA mostly use
2008-03-15 16:30:15 +01:00
// extended GB13000/GB18030 which allow T/Jap/Korean characters.
2010-05-11 17:38:39 +02:00
if ( $from == 'euc-cn' )
2005-11-05 09:51:06 +01:00
{
2009-04-20 13:59:39 +02:00
$from = 'gb18030' ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
2009-04-20 13:59:39 +02:00
if (( $convertedData = iconv ( $from , $to , $data )))
2005-11-05 09:51:06 +01:00
{
2008-03-15 16:30:15 +01:00
return $convertedData ;
2005-11-05 09:51:06 +01:00
}
2008-03-15 16:30:15 +01:00
}
return $data ;
}
2007-02-14 12:44:01 +01:00
2008-03-15 16:30:15 +01:00
/**
2011-11-17 13:43:59 +01:00
* insert / update / delete one phrase in the lang - table
2008-03-15 16:30:15 +01:00
*
* @ param string $lang
2009-04-28 17:56:04 +02:00
* @ param string $app
2008-03-15 16:30:15 +01:00
* @ param string $message_id
2011-11-17 13:43:59 +01:00
* @ param string $content translation or null to delete translation
2008-03-15 16:30:15 +01:00
*/
2009-04-28 17:56:04 +02:00
static function write ( $lang , $app , $message_id , $content )
2008-03-15 16:30:15 +01:00
{
2011-11-17 13:43:59 +01:00
if ( $content )
{
self :: $db -> insert ( self :: LANG_TABLE , array (
'content' => $content ,
), array (
'lang' => $lang ,
'app_name' => $app ,
'message_id' => $message_id ,
), __LINE__ , __FILE__ );
}
else
{
self :: $db -> delete ( self :: LANG_TABLE , array (
'lang' => $lang ,
'app_name' => $app ,
'message_id' => $message_id ,
), __LINE__ , __FILE__ );
}
2009-04-28 17:56:04 +02:00
// invalidate the cache
2010-10-05 10:30:50 +02:00
if ( ! in_array ( $app , self :: $instance_specific_translations ))
{
egw_cache :: unsetCache ( egw_cache :: TREE , __CLASS__ , $app . ':' . $lang );
}
else
{
foreach (( array ) self :: get_installed_langs () as $key => $name )
{
egw_cache :: unsetCache ( egw_cache :: INSTANCE , __CLASS__ , $app . ':' . $key );
}
}
2012-11-05 13:18:22 +01:00
}
2008-03-15 16:30:15 +01:00
/**
* read one phrase from the lang - table
*
* @ param string $lang
* @ param string $app_name
* @ param string $message_id
* @ return string / boolean content or false if not found
*/
2009-04-20 13:59:39 +02:00
static function read ( $lang , $app_name , $message_id )
2008-03-15 16:30:15 +01:00
{
2009-04-20 13:59:39 +02:00
return self :: $db -> select ( self :: LANG_TABLE , 'content' , array (
2008-03-15 16:30:15 +01:00
'lang' => $lang ,
'app_name' => $app_name ,
'message_id' => $message_id ,
2009-04-20 13:59:39 +02:00
), __LINE__ , __FILE__ ) -> fetchColumn ();
2008-03-15 16:30:15 +01:00
}
2009-04-20 13:59:39 +02:00
2008-03-15 16:30:15 +01:00
/**
* Return the message_id of a given translation
*
* @ param string $translation
* @ param string $app = '' default check all apps
* @ param string $lang = '' default check all langs
* @ return string
*/
2009-04-20 13:59:39 +02:00
static function get_message_id ( $translation , $app = null , $lang = null )
2008-03-15 16:30:15 +01:00
{
2010-06-22 18:54:47 +02:00
$where = array ( 'content ' . self :: $db -> capabilities [ egw_db :: CAPABILITY_CASE_INSENSITIV_LIKE ] . ' ' . self :: $db -> quote ( $translation ));
2008-03-15 16:30:15 +01:00
if ( $app ) $where [ 'app_name' ] = $app ;
if ( $lang ) $where [ 'lang' ] = $lang ;
2009-04-20 13:59:39 +02:00
return self :: $db -> select ( self :: LANG_TABLE , 'message_id' , $where , __LINE__ , __FILE__ ) -> fetchColumn ();
2001-08-31 10:45:44 +02:00
}
2009-04-01 17:20:32 +02:00
2012-11-05 13:18:22 +01:00
/**
2012-10-24 12:54:44 +02:00
* detect_encoding - try to detect the encoding
* only to be used if the string in question has no structure that determines his encoding
* @ param string - to be evaluated
* @ return string - encoding
*/
static function detect_encoding ( $string ) {
static $list = array ( 'utf-8' , 'iso-8859-1' , 'windows-1251' ); // list may be extended
if ( function_exists ( 'iconv' ))
{
foreach ( $list as $item ) {
$sample = iconv ( $item , $item , $string );
if ( md5 ( $sample ) == md5 ( $string ))
return $item ;
}
}
return self :: $mbstring ? strtolower ( mb_detect_encoding ( $string )) : 'iso-8859-1' ; // we choose to return iso-8859-1 as default
}
2009-04-01 17:20:32 +02:00
/**
* Return the decoded string meeting some additional requirements for mailheaders
*
* @ param string $_string -> part of an mailheader
* @ param string $displayCharset the charset parameter specifies the character set to represent the result by ( if iconv_mime_decode is to be used )
* @ return string
*/
2009-04-20 13:59:39 +02:00
static function decodeMailHeader ( $_string , $displayCharset = 'utf-8' )
2009-04-01 17:20:32 +02:00
{
2009-04-03 22:12:35 +02:00
//error_log(__FILE__.','.__METHOD__.':'."called with $_string and CHARSET $displayCharset");
2009-04-20 13:59:39 +02:00
if ( function_exists ( imap_mime_header_decode ))
{
2010-06-07 16:31:59 +02:00
// some characterreplacements, as they fail to translate
$sar = array (
'@(\x84|\x93|\x94)@' ,
2012-05-25 14:23:11 +02:00
'@(\x96|\x97|\x1a)@' ,
2010-06-07 16:31:59 +02:00
'@(\x91|\x92)@' ,
'@(\x85)@' ,
'@(\x86)@' ,
);
$rar = array (
'"' ,
'-' ,
'\'' ,
'...' ,
'+' ,
);
2009-04-01 17:20:32 +02:00
$newString = '' ;
$string = preg_replace ( '/\?=\s+=\?/' , '?= =?' , $_string );
$elements = imap_mime_header_decode ( $string );
2010-06-07 16:31:59 +02:00
2009-09-08 15:25:57 +02:00
$convertAtEnd = false ;
2009-04-20 13:59:39 +02:00
foreach (( array ) $elements as $element )
{
2012-10-24 12:54:44 +02:00
if ( $element -> charset == 'default' ) $element -> charset = self :: detect_encoding ( $element -> text );
2009-09-08 15:25:57 +02:00
if ( $element -> charset != 'x-unknown' )
{
2010-06-07 16:31:59 +02:00
if ( strtoupper ( $element -> charset ) != 'UTF-8' ) $element -> text = preg_replace ( $sar , $rar , $element -> text );
2011-11-10 11:24:37 +01:00
if ( preg_match ( '/\?=.+=\?/' , $element -> text ))
2011-08-18 16:49:16 +02:00
{
$element -> text = self :: decodeMailHeader ( $element -> text , $element -> charset );
$element -> charset = $displayCharset ;
}
2009-09-08 15:25:57 +02:00
$newString .= self :: convert ( $element -> text , $element -> charset );
}
else
{
$newString .= $element -> text ;
$convertAtEnd = true ;
}
2009-04-01 17:20:32 +02:00
}
2009-10-19 09:41:03 +02:00
if ( $convertAtEnd ) $newString = self :: decodeMailHeader ( $newString , $displayCharset );
2009-04-01 17:20:32 +02:00
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $newString );
2009-04-20 13:59:39 +02:00
}
elseif ( function_exists ( mb_decode_mimeheader ))
{
2009-04-01 17:20:32 +02:00
$string = $_string ;
2009-04-20 13:59:39 +02:00
if ( preg_match_all ( '/=\?.*\?Q\?.*\?=/iU' , $string , $matches ))
{
foreach ( $matches [ 0 ] as $match )
{
2009-04-01 17:20:32 +02:00
$fixedMatch = str_replace ( '_' , ' ' , $match );
$string = str_replace ( $match , $fixedMatch , $string );
}
$string = str_replace ( '=?ISO8859-' , '=?ISO-8859-' , $string );
$string = str_replace ( '=?windows-1258' , '=?ISO-8859-1' , $string );
}
$string = mb_decode_mimeheader ( $string );
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $string );
2009-04-20 13:59:39 +02:00
}
elseif ( function_exists ( iconv_mime_decode ))
{
2009-04-01 17:20:32 +02:00
// continue decoding also if an error occurs
$string = @ iconv_mime_decode ( $_string , 2 , $displayCharset );
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $string );
}
// no decoding function available
return preg_replace ( '/([\000-\012\015\016\020-\037\075])/' , '' , $_string );
}
2009-09-04 15:40:10 +02:00
/**
2009-11-18 11:38:15 +01:00
* replace emailaddresses enclosed in <> ( eg .: < me @ you . de > ) with the emailaddress only ( e . g : me @ you . de )
* as well as those emailadresses in links , and within broken links
* @ param string the text to process
* @ return 1
*/
2009-09-04 15:40:10 +02:00
static function replaceEmailAdresses ( & $text )
{
2009-12-16 11:57:37 +01:00
//error_log($text);
2010-05-06 13:16:57 +02:00
//replace CRLF with something other to be preserved via preg_replace as CRLF seems to vanish
$text = str_replace ( " \r \n " , '<#cr-lf#>' , $text );
2009-09-04 15:40:10 +02:00
// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
2009-11-18 11:38:15 +01:00
$text = preg_replace ( " /(<|<a href= \" )*(mailto:([ \ w \ .,-.,_.,0-9.]+)(@)([ \ w \ .,-.,_.,0-9.]+))(>|>)*/ie " , " ' $ 2 ' " , $text );
$text = preg_replace ( '~<a[^>]+href=\"(mailto:)+([^"]+)\"[^>]*>~si' , '$2 ' , $text );
$text = preg_replace ( " /(([ \ w \ .,-.,_.,0-9.]+)(@)([ \ w \ .,-.,_.,0-9.]+))( | \ s)*(< \ /a>)*( | \ s)*(>|>)*/ie " , " ' $ 1 ' " , $text );
2009-12-16 11:57:37 +01:00
$text = preg_replace ( " /(<|<)*(([ \ w \ .,-.,_.,0-9.]+)@([ \ w \ .,-.,_.,0-9.]+))(>|>)*/ie " , " ' $ 2 ' " , $text );
2010-05-06 13:16:57 +02:00
$text = str_replace ( '<#cr-lf#>' , " \r \n " , $text );
2009-09-04 15:40:10 +02:00
return 1 ;
}
2009-11-18 11:38:15 +01:00
/**
2009-09-04 15:40:10 +02:00
* strip tags out of the message completely with their content
2009-11-18 11:38:15 +01:00
* @ param string $_body is the text to be processed
* @ param string $tag is the tagname which is to be removed . Note , that only the name of the tag is to be passed to the function
2009-12-02 15:56:41 +01:00
* without the enclosing brackets
2009-11-18 11:38:15 +01:00
* @ param string $endtag can be different from tag but should be used only , if begin and endtag are known to be different e . g .: <!-- -->
2010-05-11 17:38:39 +02:00
* @ param bool $addbbracesforendtag if endtag is given , you may decide if the </ and > braces are to be added ,
2009-12-02 15:56:41 +01:00
* or if you want the string to be matched as is
2009-11-18 11:38:15 +01:00
* @ return void the modified text is passed via reference
2009-09-04 15:40:10 +02:00
*/
2009-12-02 15:56:41 +01:00
static function replaceTagsCompletley ( & $_body , $tag , $endtag = '' , $addbracesforendtag = true )
2009-09-04 15:40:10 +02:00
{
if ( $tag ) $tag = strtolower ( $tag );
2012-05-25 14:23:11 +02:00
$singleton = false ;
if ( $endtag == '/>' ) $singleton = true ;
2009-09-04 15:40:10 +02:00
if ( $endtag == '' || empty ( $endtag ) || ! isset ( $endtag ))
{
2011-11-30 10:20:05 +01:00
$endtag = $tag ;
2009-09-04 15:40:10 +02:00
} else {
2011-11-30 10:20:05 +01:00
$endtag = strtolower ( $endtag );
2012-05-25 14:23:11 +02:00
//error_log(__METHOD__.' Using EndTag:'.$endtag);
2009-09-04 15:40:10 +02:00
}
// strip tags out of the message completely with their content
$taglen = strlen ( $tag );
$endtaglen = strlen ( $endtag );
if ( $_body ) {
2012-05-25 14:23:11 +02:00
if ( $singleton )
2009-12-02 15:56:41 +01:00
{
2012-05-25 14:23:11 +02:00
$_body = preg_replace ( '~<' . $tag . '[^>].*? ' . $endtag . '~simU' , '' , $_body );
2009-12-02 15:56:41 +01:00
}
2012-05-25 14:23:11 +02:00
else
2009-12-02 15:56:41 +01:00
{
2012-05-25 14:23:11 +02:00
if ( $addbracesforendtag === true )
{
$_body = preg_replace ( '~<' . $tag . '[^>]*?>(.*)</' . $endtag . '[\s]*>~simU' , '' , $_body );
// remove left over tags, unfinished ones, and so on
$_body = preg_replace ( '~<' . $tag . '[^>]*?>~si' , '' , $_body );
}
if ( $addbracesforendtag === false )
{
$_body = preg_replace ( '~<' . $tag . '[^>]*?>(.*)' . $endtag . '~simU' , '' , $_body );
// remove left over tags, unfinished ones, and so on
$_body = preg_replace ( '~<' . $tag . '[^>]*?>~si' , '' , $_body );
$_body = preg_replace ( '~' . $endtag . '~' , '' , $_body );
}
2009-12-02 15:56:41 +01:00
}
2009-09-04 15:40:10 +02:00
}
}
2009-11-18 11:38:15 +01:00
/**
* convertHTMLToText
* @ param string $_html : Text to be stripped down
* @ param string $displayCharset : charset to use ; should be a valid charset
* @ param bool $stripcrl : flag to indicate for the removal of all crlf \r\n
* @ param bool $stripalltags : flag to indicate wether or not to strip $_html from all remaining tags
* @ return text $_html : the modified text .
*/
static function convertHTMLToText ( $_html , $displayCharset = false , $stripcrl = false , $stripalltags = true )
2009-09-04 15:40:10 +02:00
{
if ( $displayCharset === false ) $displayCharset = self :: $system_charset ;
2009-11-18 11:38:15 +01:00
//error_log(__METHOD__.$_html);
2009-09-04 15:40:10 +02:00
#print '<hr>';
#print "<pre>"; print htmlspecialchars($_html);
#print "</pre>";
#print "<hr>";
2012-06-04 16:56:45 +02:00
if ( stripos ( $_html , 'style' ) !== false ) self :: replaceTagsCompletley ( $_html , 'style' ); // clean out empty or pagewide style definitions / left over tags
if ( stripos ( $_html , 'head' ) !== false ) self :: replaceTagsCompletley ( $_html , 'head' ); // Strip out stuff in head
if ( stripos ( $_html , '![if' ) !== false && stripos ( $_html , '<![endif]>' ) !== false ) self :: replaceTagsCompletley ( $_html , '!\[if' , '<!\[endif\]>' , false ); // Strip out stuff in ifs
if ( stripos ( $_html , '!--[if' ) !== false && stripos ( $_html , '<![endif]-->' ) !== false ) self :: replaceTagsCompletley ( $_html , '!--\[if' , '<!\[endif\]-->' , false ); // Strip out stuff in ifs
2010-01-19 14:29:16 +01:00
$Rules = array ( '@<script[^>]*?>.*?</script>@siU' , // Strip out javascript
2009-09-04 15:40:10 +02:00
'@&(quot|#34);@i' , // Replace HTML entities
'@&(amp|#38);@i' , // Ampersand &
'@&(lt|#60);@i' , // Less Than <
'@&(gt|#62);@i' , // Greater Than >
'@&(nbsp|#160);@i' , // Non Breaking Space
'@&(iexcl|#161);@i' , // Inverted Exclamation point
'@&(cent|#162);@i' , // Cent
'@&(pound|#163);@i' , // Pound
'@&(copy|#169);@i' , // Copyright
'@&(reg|#174);@i' , // Registered
2010-09-15 10:39:07 +02:00
'@&(trade|#8482);@i' , // trade
2010-08-31 16:23:58 +02:00
'@'@i' , // singleQuote
2010-09-15 10:39:07 +02:00
'@(\xc2\xa0)@' , // nbsp or tab (encoded windows-style)
2009-09-04 15:40:10 +02:00
);
$Replace = array ( '' ,
'"' ,
2010-09-09 12:19:23 +02:00
'#amper#sand#' ,
2009-09-04 15:40:10 +02:00
'<' ,
'>' ,
' ' ,
chr ( 161 ),
chr ( 162 ),
chr ( 163 ),
2010-09-15 10:39:07 +02:00
'(C)' , //chr(169),// copyrighgt
'(R)' , //chr(174),// registered
'(TM)' , // trade
2010-08-31 16:23:58 +02:00
" ' " ,
2010-09-15 10:39:07 +02:00
' ' ,
2009-09-04 15:40:10 +02:00
);
$_html = preg_replace ( $Rules , $Replace , $_html );
2009-11-18 11:38:15 +01:00
2010-08-31 16:23:58 +02:00
// removing carriage return linefeeds, preserve those enclosed in <pre> </pre> tags
2011-04-09 11:45:41 +02:00
if ( $stripcrl === true )
2010-08-31 16:23:58 +02:00
{
if ( stripos ( $_html , '<pre>' ) !== false )
{
$contentArr = html :: splithtmlByPRE ( $_html );
foreach ( $contentArr as $k =>& $elem )
{
2011-04-09 11:45:41 +02:00
if ( stripos ( $elem , '<pre>' ) === false )
2010-08-31 16:23:58 +02:00
{
2011-05-05 10:59:54 +02:00
//$elem = str_replace('@(\r\n)@i',' ',$elem);
$elem = str_replace ( array ( " \r \n " , " \n " ), ' ' , $elem );
2010-08-31 16:23:58 +02:00
}
}
$_html = implode ( '' , $contentArr );
}
2011-05-05 10:59:54 +02:00
else
{
$_html = str_replace ( array ( " \r \n " , " \n " ), ' ' , $_html );
}
2010-08-31 16:23:58 +02:00
}
2009-09-04 15:40:10 +02:00
$tags = array (
0 => '~<h[123][^>]*>\r*\n*~si' ,
1 => '~<h[456][^>]*>\r*\n*~si' ,
2 => '~<table[^>]*>\r*\n*~si' ,
3 => '~<tr[^>]*>\r*\n*~si' ,
4 => '~<li[^>]*>\r*\n*~si' ,
5 => '~<br[^>]*>\r*\n*~si' ,
6 => '~<br[^>]*>~si' ,
7 => '~<p[^>]*>\r*\n*~si' ,
8 => '~<div[^>]*>\r*\n*~si' ,
9 => '~<hr[^>]*>\r*\n*~si' ,
10 => '/<blockquote type="cite">/' ,
2010-08-31 16:23:58 +02:00
11 => '/<blockquote>/' ,
12 => '~</blockquote>~si' ,
13 => '~<blockquote[^>]*>~si' ,
2009-09-04 15:40:10 +02:00
);
$Replace = array (
0 => " \r \n " ,
1 => " \r \n " ,
2 => " \r \n " ,
3 => " \r \n " ,
4 => " \r \n " ,
5 => " \r \n " ,
6 => " \r \n " ,
7 => " \r \n " ,
8 => " \r \n " ,
9 => " \r \n __________________________________________________ \r \n " ,
10 => '#blockquote#type#cite#' ,
2010-08-31 16:23:58 +02:00
11 => '#blockquote#type#cite#' ,
12 => '#blockquote#end#cite#' ,
13 => '#blockquote#type#cite#' ,
2009-09-04 15:40:10 +02:00
);
$_html = preg_replace ( $tags , $Replace , $_html );
$_html = preg_replace ( '~</t(d|h)>\s*<t(d|h)[^>]*>~si' , ' - ' , $_html );
$_html = preg_replace ( '~<img[^>]+>~s' , '' , $_html );
2009-11-18 11:38:15 +01:00
// replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)
self :: replaceEmailAdresses ( $_html );
2009-09-04 15:40:10 +02:00
//convert hrefs to description -> URL
$_html = preg_replace ( '~<a[^>]+href=\"([^"]+)\"[^>]*>(.*)</a>~si' , '[$2 -> $1]' , $_html );
2011-05-10 17:32:44 +02:00
// reducing double \r\n to single ones, dont mess with pre sections
if ( $stripcrl === true )
{
if ( stripos ( $_html , '<pre>' ) !== false )
{
$contentArr = html :: splithtmlByPRE ( $_html );
foreach ( $contentArr as $k =>& $elem )
{
if ( stripos ( $elem , '<pre>' ) === false )
{
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
}
// strip out whitespace inbetween CR/LF
$elem = preg_replace ( '~\r\n\s+\r\n~si' , " \r \n \r \n " , $elem );
2011-08-04 09:27:23 +02:00
// strip out / reduce exess CR/LF
2011-05-10 17:32:44 +02:00
$elem = preg_replace ( '~\r\n{3,}~si' , " \r \n \r \n " , $elem );
}
}
$_html = implode ( '' , $contentArr );
}
else
{
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
}
// strip out whitespace inbetween CR/LF
$_html = preg_replace ( '~\r\n\s+\r\n~si' , " \r \n \r \n " , $_html );
2011-08-04 09:27:23 +02:00
// strip out / reduce exess CR/LF
2011-05-10 17:32:44 +02:00
$_html = preg_replace ( '~(\r\n){3,}~si' , " \r \n \r \n " , $_html );
}
}
2009-11-18 11:38:15 +01:00
//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content
if ( $stripalltags ) {
2011-05-11 12:39:55 +02:00
$_html = preg_replace ( '~<[^>^@]+>~s' , '' , $_html );
2009-11-18 11:38:15 +01:00
//$_html = strip_tags($_html, '<a>');
}
2009-09-04 15:40:10 +02:00
// reducing spaces
$_html = preg_replace ( '~ +~s' , ' ' , $_html );
// we dont reduce whitespace at the start or the end of the line, since its used for structuring the document
#$_html = preg_replace('~^\s+~m','',$_html);
#$_html = preg_replace('~\s+$~m','',$_html);
2010-09-09 12:19:23 +02:00
// restoring ampersands
$_html = str_replace ( '#amper#sand#' , '&' , $_html );
2011-05-05 10:59:54 +02:00
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' -> '.$_html);
2009-09-04 15:40:10 +02:00
$_html = html_entity_decode ( $_html , ENT_COMPAT , $displayCharset );
2011-05-05 10:59:54 +02:00
//error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' After html_entity_decode: -> '.$_html);
2009-11-18 11:38:15 +01:00
//self::replaceEmailAdresses($_html);
2009-09-04 15:40:10 +02:00
#error_log($text);
$pos = strpos ( $_html , 'blockquote' );
2010-09-09 12:19:23 +02:00
//error_log("convert HTML2Text: $_html");
2009-09-04 15:40:10 +02:00
if ( $pos === false ) {
return $_html ;
} else {
$indent = 0 ;
$indentString = '' ;
2010-08-31 16:23:58 +02:00
$quoteParts = preg_split ( '/#blockquote#type#cite#/' , $_html , - 1 , PREG_SPLIT_OFFSET_CAPTURE );
2009-09-04 15:40:10 +02:00
foreach ( $quoteParts as $quotePart ) {
if ( $quotePart [ 1 ] > 0 ) {
$indent ++ ;
$indentString .= '>' ;
}
2010-08-31 16:23:58 +02:00
$quoteParts2 = preg_split ( '/#blockquote#end#cite#/' , $quotePart [ 0 ], - 1 , PREG_SPLIT_OFFSET_CAPTURE );
2009-09-04 15:40:10 +02:00
foreach ( $quoteParts2 as $quotePart2 ) {
if ( $quotePart2 [ 1 ] > 0 ) {
$indent -- ;
$indentString = substr ( $indentString , 0 , $indent );
}
$quoteParts3 = explode ( " \r \n " , $quotePart2 [ 0 ]);
foreach ( $quoteParts3 as $quotePart3 ) {
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'Line:'.$quotePart3);
2009-09-04 15:40:10 +02:00
$allowedLength = 76 - strlen ( " \r \n $indentString " );
2009-11-18 11:38:15 +01:00
// only break lines, if not already indented
2010-09-09 12:19:23 +02:00
if ( substr ( $quotePart3 , 0 , strlen ( $indentString )) != $indentString )
2009-11-18 11:38:15 +01:00
{
if ( strlen ( $quotePart3 ) > $allowedLength ) {
$s = explode ( " " , $quotePart3 );
$quotePart3 = " " ;
$linecnt = 0 ;
foreach ( $s as $k => $v ) {
$cnt = strlen ( $v );
// only break long words within the wordboundaries,
// but it may destroy links, so we check for href and dont do it if we find it
if ( $cnt > $allowedLength && stripos ( $v , 'href=' ) === false ) {
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'LongWordFound:'.$v);
2009-11-18 11:38:15 +01:00
$v = wordwrap ( $v , $allowedLength , " \r \n $indentString " , true );
}
// the rest should be broken at the start of the new word that exceeds the limit
if ( $linecnt + $cnt > $allowedLength ) {
$v = " \r \n $indentString $v " ;
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'breaking here:'.$v);
2009-11-18 11:38:15 +01:00
$linecnt = 0 ;
} else {
$linecnt += $cnt ;
}
if ( strlen ( $v )) $quotePart3 .= ( strlen ( $quotePart3 ) ? " " : " " ) . $v ;
2009-09-04 15:40:10 +02:00
}
}
}
2010-09-09 12:19:23 +02:00
//error_log(__METHOD__.__LINE__.'partString to return:'.$indentString . $quotePart3);
2009-09-04 15:40:10 +02:00
$asciiTextBuff [] = $indentString . $quotePart3 ;
}
}
}
return implode ( " \r \n " , $asciiTextBuff );
}
}
2008-03-15 16:30:15 +01:00
}