egroupware/addressbook/csv_import.php
Ralf Becker 646931dc8a fix for bug #1838: problem with utf-8 data in all csv imports
- fgetcsv only works correct, if setlocal is called with an existing and
  correct local
- improved projectmanager method guess_local and moved it to
  common::setlocal, which takes now the charset, lang and country of the
  user into account
- csv-import also displays now the conversation done and reads usernames
  in brackets
- added some missing fields
2009-04-03 12:26:32 +00:00

446 lines
18 KiB
PHP

<?php
/**
* Addressbook - CSV Import
*
* @link http://www.egroupware.org
* @author Ralf Becker <RalfBecker-AT-outdoor-training.de>
* @package addressbook
* @copyright (c) 2003-9 by Ralf Becker <RalfBecker-AT-outdoor-training.de>
* @license http://opensource.org/licenses/gpl-license.php GPL - GNU General Public License
* @version $Id$
*/
$GLOBALS['egw_info'] = array(
'flags' => array(
'currentapp' => 'addressbook',
'noheader' => True,
),
);
include('../header.inc.php');
if (isset($_FILES['csvfile']['tmp_name']))
{
$csvfile = tempnam($GLOBALS['egw_info']['server']['temp_dir'],$GLOBALS['egw_info']['flags']['currentapp']."_");
$GLOBALS['egw']->session->appsession('csvfile','',$csvfile);
$_POST['action'] = move_uploaded_file($_FILES['csvfile']['tmp_name'],$csvfile) ?
'download' : '';
}
else
{
$csvfile = $GLOBALS['egw']->session->appsession('csvfile');
}
if ($_POST['cancel'])
{
@unlink($csvfile);
$GLOBALS['egw']->redirect_link('/addressbook/index.php');
}
if (isset($_POST['charset']))
{
// we have to set the local, to fix eg. utf-8 imports, as fgetcsv requires it!
common::setlocale(LC_CTYPE,$_POST['charset']);
}
$GLOBALS['egw_info']['flags']['app_header'] = lang('Import CSV-File into Addressbook');
$GLOBALS['egw']->common->egw_header();
$GLOBALS['egw']->contacts = new contacts();
//$GLOBALS['egw']->template->set_unknowns('keep');
$GLOBALS['egw']->template->set_file(array('import' => 'csv_import.tpl'));
$GLOBALS['egw']->template->set_block('import','filename','filenamehandle');
$GLOBALS['egw']->template->set_block('import','fheader','fheaderhandle');
$GLOBALS['egw']->template->set_block('import','fields','fieldshandle');
$GLOBALS['egw']->template->set_block('import','ffooter','ffooterhandle');
$GLOBALS['egw']->template->set_block('import','imported','importedhandle');
if(($_POST['action'] == 'download' || $_POST['action'] == 'continue') && (!$_POST['fieldsep'] || !$csvfile || !($fp=fopen($csvfile,'rb'))))
{
$_POST['action'] = '';
}
$GLOBALS['egw']->template->set_var('action_url',$GLOBALS['egw']->link('/addressbook/csv_import.php'));
$PSep = '||'; // Pattern-Separator, separats the pattern-replacement-pairs in trans
$ASep = '|>'; // Assignment-Separator, separats pattern and replacesment
$VPre = '|#'; // Value-Prefix, is expanded to \ for ereg_replace
$CPre = '|['; $CPreReg = '\|\['; // |{csv-fieldname} is expanded to the value of the csv-field
$CPos = ']'; $CPosReg = '\]'; // if used together with @ (replacement is eval-ed) value gets autom. quoted
// find in Addressbook, at least n_family AND (n_given OR org_name) have to match
function addr_id($n_family,$n_given,$org_name)
{
$addrs = $GLOBALS['egw']->contacts->search(array('n_family'=>$n_family,'n_given'=>$n_given,'org_name'=>$org_name));
if(!count($addrs))
{
$addrs = $GLOBALS['egw']->contacts->search(array('n_family'=>$n_family,'n_given'=>$n_given));
}
if(!count($addrs))
{
$addrs = $GLOBALS['egw']->contacts->search(array('n_family'=>$n_family,'org_name'=>$org_name));
}
if(count($addrs))
{
return $addrs[0]['id'];
}
return False;
}
function cat_id($cats)
{
if(!$cats)
{
return '';
}
$ids = array();
foreach(split(' *[,;] *',$cats) as $cat)
{
if (is_numeric($cat) && $GLOBALS['egw']->categories->id2name($cat) != '--')
{
$id = (int) $cat;
}
elseif ($id = $GLOBALS['egw']->categories->name2id(addslashes($cat)))
{
// cat exists
}
else
{ // create new cat
$id = $GLOBALS['egw']->categories->add(array('name' => $cat,'descr' => $cat));
}
$ids[$id] = $id; // we use the $id as index to not ass a cat twice
}
return implode(',',$ids);
}
if ($_POST['next']) $_POST['action'] = 'next';
switch($_POST['action'])
{
case '': // Start, ask Filename
$GLOBALS['egw']->template->set_var('lang_csvfile',lang('CSV-Filename'));
$GLOBALS['egw']->template->set_var('lang_fieldsep',lang('Fieldseparator'));
$GLOBALS['egw']->template->set_var('lang_charset',lang('Charset of file'));
$GLOBALS['egw']->template->set_var('select_charset',
html::select('charset','',
$GLOBALS['egw']->translation->get_installed_charsets()+
array('utf-8' => 'utf-8 (Unicode)'),True));
$GLOBALS['egw']->template->set_var('fieldsep',$_POST['fieldsep'] ? $_POST['fieldsep'] : ';');
$GLOBALS['egw']->template->set_var('submit',lang('Import'));
$GLOBALS['egw']->template->set_var('csvfile',$csvfile);
$GLOBALS['egw']->template->set_var('enctype','ENCTYPE="multipart/form-data"');
$GLOBALS['egw']->template->parse('filenamehandle','filename');
break;
case 'continue':
case 'download':
$defaults = $GLOBALS['egw_info']['user']['preferences']['addressbook']['cvs_import'];
if(!is_array($defaults))
{
$defaults = array();
}
$GLOBALS['egw']->template->set_var('lang_csv_fieldname',lang('CSV-Fieldname'));
$GLOBALS['egw']->template->set_var('lang_addr_fieldname',lang('Addressbook-Fieldname'));
$GLOBALS['egw']->template->set_var('lang_translation',lang("Translation").' <a href="#help">'.lang('help').'</a>');
$GLOBALS['egw']->template->set_var('submit',
html::submit_button('convert','Import') . '&nbsp;'.
html::submit_button('cancel','Cancel'));
$GLOBALS['egw']->template->set_var('lang_debug',lang('Test Import (show importable records <u>only</u> in browser)'));
$GLOBALS['egw']->template->parse('fheaderhandle','fheader');
$addr_names = $GLOBALS['egw']->contacts->contact_fields;
$addr_names['cat_id'] .= ': id or name, comma separated list';
$addr_names['private'] .= ': 0 = public, 1 = private';
$addr_names['owner'] .= ': id or account name of user or group, defaults to importing user';
$addr_names['bday'] .= ': YYYY-mm-dd';
unset($addr_names['jpegphoto']); // cant cvs import that
foreach($GLOBALS['egw']->contacts->customfields as $name => $data)
{
$addr_names['#'.$name] = $data['label'];
}
$addr_name_options = "<option value=\"\">none\n";
foreach($addr_names as $field => $name)
{
$addr_name_options .= "<option value=\"$field\">".$GLOBALS['egw']->strip_html($name)."\n";
}
$csv_fields = fgetcsv($fp,8000,$_POST['fieldsep']);
$csv_fields = $GLOBALS['egw']->translation->convert($csv_fields,$_POST['charset']);
$csv_fields[] = 'no CSV 1'; // eg. for static assignments
$csv_fields[] = 'no CSV 2';
$csv_fields[] = 'no CSV 3';
foreach($csv_fields as $csv_idx => $csv_field)
{
$GLOBALS['egw']->template->set_var('csv_field',$csv_field);
$GLOBALS['egw']->template->set_var('csv_idx',$csv_idx);
if($def = $defaults[$csv_field])
{
list($addr,$_POST['trans']) = explode($PSep,$def,2);
$GLOBALS['egw']->template->set_var('trans',$_POST['trans']);
$GLOBALS['egw']->template->set_var('addr_fields',str_replace('="'.$addr.'">','="'.$addr.'" selected>',$addr_name_options));
}
else
{
$GLOBALS['egw']->template->set_var('trans','');
$GLOBALS['egw']->template->set_var('addr_fields',$addr_name_options);
}
$GLOBALS['egw']->template->parse('fieldshandle','fields',True);
}
$GLOBALS['egw']->template->set_var('lang_start',lang('Startrecord'));
$GLOBALS['egw']->template->set_var('start',get_var('start',array('POST'),1));
$msg = ($safe_mode = ini_get('safe_mode') == 'On') ? lang('to many might exceed your execution-time-limit'):
lang('empty for all');
$GLOBALS['egw']->template->set_var('lang_max',lang('Number of records to read (%1)',$msg));
$GLOBALS['egw']->template->set_var('max',get_var('max',array('POST'),$safe_mode ? 200 : ''));
$GLOBALS['egw']->template->set_var('debug',get_var('debug',array('POST'),True)?' checked':'');
$GLOBALS['egw']->template->parse('ffooterhandle','ffooter');
fclose($fp);
$hiddenvars = html::input_hidden(array(
'action' => 'import',
'fieldsep'=> $_POST['fieldsep'],
'charset' => $_POST['charset']
));
$mktime_lotus = "${PSep}0?([0-9]+)[ .:-]+0?([0-9]*)[ .:-]+0?([0-9]*)[ .:-]+0?([0-9]*)[ .:-]+0?([0-9]*)[ .:-]+0?([0-9]*).*$ASep@mktime(${VPre}4,${VPre}5,${VPre}6,${VPre}2,${VPre}3,${VPre}1)";
$help_on_trans = "<a name=\"help\"></a><b>How to use Translation's</b><p>".
"Translations enable you to change / adapt the content of each CSV field for your needs. <br>".
"General syntax is: <b>pattern1 ${ASep} replacement1 ${PSep} ... ${PSep} patternN ${ASep} replacementN</b><br>".
"If the pattern-part of a pair is ommited it will match everything ('^.*$'), which is only ".
"usefull for the last pair, as they are worked from left to right.<p>".
"First example: <b>1${ASep}private${PSep}public</b><br>".
"This will translate a '1' in the CSV field to 'privat' and everything else to 'public'.<p>".
"Patterns as well as the replacement can be regular expressions (the replacement is done via ereg_replace). ".
"If, after all replacements, the value starts with an '@' the whole value is eval()'ed, so you ".
"may use all php, phpgw plus your own functions. This is quiet powerfull, but <u>circumvents all ACL</u>. ".
"Therefor this feature is only availible to Adminstrators.<p>".
"Example using regular expressions and '@'-eval(): <br><b>$mktime_lotus</b><br>".
"It will read a date of the form '2001-05-20 08:00:00.00000000000000000' (and many more, see the regular expr.). ".
"The&nbsp;[&nbsp;.:-]-separated fields are read and assigned in different order to @mktime(). Please note to use ".
"${VPre} insted of a backslash (I couldn't get backslash through all the involved templates and forms.) ".
"plus the field-number of the pattern.<p>".
"In addintion to the fields assign by the pattern of the reg.exp. you can use all other CSV-fields, with the ".
"syntax <b>${CPre}CSV-FIELDNAME$CPos</b>. Here is an example: <br>".
"<b>.+$ASep${CPre}Company$CPos: ${CPre}NFamily$CPos, ${CPre}NGiven$CPos$PSep${CPre}NFamily$CPos, ${CPre}NGiven$CPos</b><br>".
"It is used on the CSV-field 'Company' and constructs a something like <i>Company: FamilyName, GivenName</i> or ".
"<i>FamilyName, GivenName</i> if 'Company' is empty.<p>".
"You can use the 'No CSV #'-fields to assign csv-values to more than on field, the following example uses the ".
"csv-field 'Note' (which gots already assingned to the description) and construct a short subject: ".
"<b>@substr(${CPre}Note$CPos,0,60).' ...'</b><p>".
"Their is one important user-function for the Addressbook:<br>".
"<b>@cat_id(Cat1,...,CatN)</b> returns a (','-separated) list with the cat_id's. If a category isn't found, it ".
"will be automaticaly added.<p>".
"I hope that helped to understand the features, if not <a href='mailto:egroupware-users@lists.sf.net'>ask</a>.";
$GLOBALS['egw']->template->set_var('help_on_trans',lang($help_on_trans)); // I don't think anyone will translate this
break;
case 'next':
$_POST['addr_fields'] = unserialize(stripslashes($_POST['addr_fields']));
$_POST['trans'] = unserialize(stripslashes($_POST['trans']));
// fall-through
case 'import':
$hiddenvars = html::input_hidden(array(
'action' => 'continue',
'fieldsep'=> $_POST['fieldsep'],
'charset' => $_POST['charset'],
'start' => $_POST['start']+(!$_POST['debug'] ? $_POST['max'] : 0),
'max' => $_POST['max'],
'debug' => $_POST['debug'],
'addr_fields' => $_POST['addr_fields'],
'trans' => $_POST['trans']
));
@set_time_limit(0);
ini_set('auto_detect_line_endings',true); // to allow to import files created eg. on a mac
$fp=fopen($csvfile,'r');
$csv_fields = fgetcsv($fp,8000,$_POST['fieldsep']);
$csv_fields = $GLOBALS['egw']->translation->convert($csv_fields,$_POST['charset']);
$csv_fields[] = 'no CSV 1'; // eg. for static assignments
$csv_fields[] = 'no CSV 2';
$csv_fields[] = 'no CSV 3';
$addr_fields = array_diff($_POST['addr_fields'],array('')); // throw away empty / not assigned entrys
$defaults = array();
foreach($addr_fields as $csv_idx => $addr)
{ // convert $_POST['trans'][$csv_idx] into array of pattern => value
$defaults[$csv_fields[$csv_idx]] = $addr;
if($_POST['trans'][$csv_idx])
{
$defaults[$csv_fields[$csv_idx]] .= $PSep.$_POST['trans'][$csv_idx];
}
}
$GLOBALS['egw']->preferences->read_repository();
$GLOBALS['egw']->preferences->add('addressbook','cvs_import',$defaults);
$GLOBALS['egw']->preferences->save_repository(True);
$log = '<table border="1" style="border: 1px dotted black; border-collapse: collapse;">'."\n\t<tr><td>#</td>\n";
foreach($addr_fields as $csv_idx => $addr)
{ // convert $_POST['trans'][$csv_idx] into array of pattern => value
// if (!$_POST['debug']) echo "<p>$csv_idx: ".$csv_fields[$csv_idx].": $addr".($_POST['trans'][$csv_idx] ? ': '.$_POST['trans'][$csv_idx] : '')."</p>";
$pat_reps = explode($PSep,stripslashes($_POST['trans'][$csv_idx]));
$replaces = ''; $values = '';
if($pat_reps[0] != '')
{
foreach($pat_reps as $k => $pat_rep)
{
list($pattern,$replace) = explode($ASep,$pat_rep,2);
if($replace == '')
{
$replace = $pattern; $pattern = '^.*$';
}
$values[$pattern] = $replace; // replace two with only one, added by the form
$replaces .= ($replaces != '' ? $PSep : '') . $pattern . $ASep . $replace;
}
$_POST['trans'][$csv_idx] = $values;
}
else
{
unset( $_POST['trans'][$csv_idx] );
}
$log .= "\t\t<td><b>$addr</b></td>\n";
}
if (!in_array('private',$addr_fields)) // autocreate public access if not set by user
{
$log .= "\t\t<td><b>private</b></td>\n";
}
$start = $_POST['start'] < 1 ? 1 : $_POST['start'];
// ignore empty lines, is_null($fields[0]) is returned on empty lines !!!
for($i = 1; $i < $start; ++$i) // overread lines before our start-record
{
while(($fields = fgetcsv($fp,8000,$_POST['fieldsep'])) && is_null($fields[0])) ;
}
for($anz = 0; !$_POST['max'] || $anz < $_POST['max']; ++$anz)
{
while(($fields = fgetcsv($fp,8000,$_POST['fieldsep'])) && is_null($fields[0])) ;
if (!$fields)
{
break; // EOF
}
$fields = $GLOBALS['egw']->translation->convert($fields,$_POST['charset']);
$log .= "\t</tr><tr><td>".($start+$anz)."</td>\n";
$values = $orig = array();
foreach($addr_fields as $csv_idx => $addr)
{
//echo "<p>$csv: $addr".($_POST['trans'][$csv] ? ': '.$_POST['trans'][$csv] : '')."</p>";
$val = $fields[$csv_idx];
if(isset($_POST['trans'][$csv_idx]))
{
$trans_csv = $_POST['trans'][$csv_idx];
while(list($pattern,$replace) = each($trans_csv))
{
if(ereg((string) $pattern,$val))
{
// echo "<p>csv_idx='$csv_idx',info='$addr',trans_csv=".print_r($trans_csv).",ereg_replace('$pattern','$replace','$val') = ";
$val = ereg_replace((string) $pattern,str_replace($VPre,'\\',$replace),(string) $val);
// echo "'$val'</p>";
$reg = $CPreReg.'([a-zA-Z_0-9]+)'.$CPosReg;
while(ereg($reg,$val,$vars))
{ // expand all CSV fields
$val = str_replace($CPre . $vars[1] . $CPos, $val[0] == '@' ? "'"
. addslashes($fields[array_search($vars[1], $csv_fields)])
. "'" : $fields[array_search($vars[1], $csv_fields)], $val);
}
if($val[0] == '@')
{
if (!$GLOBALS['egw_info']['user']['apps']['admin'])
{
echo lang('@-eval() is only availible to admins!!!');
}
else
{
// removing the $ to close security hole of showing vars, which contain eg. passwords
$val = 'return '.substr(str_replace('$','',$val),1).';';
// echo "<p>eval('$val')=";
$val = eval($val);
// echo "'$val'</p>";
}
}
if($pattern[0] != '@' || $val)
{
break;
}
}
}
}
$values[$addr] = $orig[$addr] = $val;
}
$empty = !count($values);
// convert the category name to an id
if ($values['cat_id'])
{
$values['cat_id'] = cat_id($values['cat_id']);
}
// convert dates to timestamps
foreach(array('created','modified') as $date)
{
if (isset($values[$date]) && !is_numeric($date))
{
// convert german DD.MM.YYYY format into ISO YYYY-MM-DD format
$values[$date] = ereg_replace('([0-9]{1,2}).([0-9]{1,2}).([0-9]{4})','\3-\2-\1',$values[$date]);
// remove fractures of seconds if present at the end of the string
if (ereg('(.*)\.[0-9]+',$values[$date],$parts)) $values[$date] = $parts[1];
$values[$date] = strtotime($values[$date]);
}
}
// convert user-names to user-id's
foreach(array('owner','modifier','creator') as $user)
{
if (isset($values[$user]) && !is_numeric($user))
{
if (preg_match('/\[([^\]]+)\]/',$values[$user],$matches)) $values[$user] = $matches[1];
$values[$user] = $GLOBALS['egw']->accounts->name2id($values[$user],'account_lid',$user=='owner'?null:'u');
}
}
if (!in_array('owner',$addr_fields) || !$values['owner'])
{
$values['owner'] = $GLOBALS['egw_info']['user']['account_id'];
}
if (!in_array('private',$addr_fields))
{
$values['private'] = 0; // public access if not set by user
$log .= "\t\t<td>".$values['private']."</td>\n";
}
else
{
$values['private'] = (int) in_array($values['private'],array(lang('yes'),'yes','private','1','true'));
}
foreach(array('adr_one_countryname','adr_two_countryname') as $name)
{
if (strlen($values[$name]) == 2)
{
$values[$name] = $GLOBALS['egw']->country->get_full_name($values[$name]);
}
}
if(!$_POST['debug'] && !$empty) // dont import empty contacts
{
$rvalue=$GLOBALS['egw']->contacts->save($values);
//echo "<p>adding: ".print_r($values,true)."</p>\n";
}
// display read and interpreted results, so the user can check it
foreach($addr_fields as $name)
{
$log .= "\t\t<td>".($orig[$name] != $values[$name] ? htmlspecialchars($orig[$name]).' --> ' : '').
htmlspecialchars($values[$name])."</td>\n";
}
}
$log .= "\t</tr>\n</table>\n";
$GLOBALS['egw']->template->set_var('anz_imported',($_POST['debug'] ?
lang('%1 records read (not yet imported, you may go %2back%3 and uncheck Test Import)',
$anz,'','') :
lang('%1 records imported',$anz)). '&nbsp;'.
(!$_POST['debug'] && $fields ? html::submit_button('next','Import next set') . '&nbsp;':'').
html::submit_button('continue','Back') . '&nbsp;'.
html::submit_button('cancel','Cancel'));
$GLOBALS['egw']->template->set_var('log',$log);
$GLOBALS['egw']->template->parse('importedhandle','imported');
break;
}
$GLOBALS['egw']->template->set_var('hiddenvars',str_replace('{','&#x7B;',$hiddenvars));
$GLOBALS['egw']->template->pfp('out','import',True);
$GLOBALS['egw']->common->egw_footer();