fix the handling of charsets in the contacts_ldap class and provide admins with a script, that helps to fix old, corrupted eGW-LDAP-addressbooks. For a description of why and when this is necessary, please read the header of the fix-ldap-charset-for-egw1.1.pl script. This should appear in the release-notes for eGW 1.1.

2024-12-22 14:41:29 +01:00 · 2005-08-13 21:01:14 +00:00 · 2005-08-13 21:01:14 +00:00 · d61920a22b
commit d61920a22b
parent 8e959382c1
3 changed files with 203 additions and 10 deletions
--- a/phpgwapi/doc/ldap/fix-ldap-charset-for-egw1.1.pl
+++ b/phpgwapi/doc/ldap/fix-ldap-charset-for-egw1.1.pl
@ -0,0 +1,190 @@
+#!/usr/bin/perl -w
+
+use strict;
+use MIME::Base64;
+use Text::Iconv;
+
+ #**************************************************************************
+ #               fix-ldap-charset-for-egw1.1.pl  -  description            *
+ #                             -------------------                         *
+ #    begin                : Mon 2005/08/08                                *
+ #    copyright            : (C) 2005 by Carsten Wolff                     *
+ #    email                : wolffc@egroupware.org                         *
+ #                                                                         *
+ #   This program is free software; you can redistribute it and/or modify  *
+ #   it under the terms of the GNU General Public License as published by  *
+ #   the Free Software Foundation; either version 2 of the License, or     *
+ #   (at your option) any later version.                                   *
+ #                                                                         *
+ #                                                                         *
+ #   This script is used to adapt the charset in an egw ldap addressbook   *
+ #   to the egw code in Release 1.1 and newer.                             *
+ #                                                                         *
+ #                                                                         *
+ #   The old egw code just called utf8_encode on every attribute before    *
+ #   writing and utf8_decode after reading an ldap attribute. This was     *
+ #   fine as long as egw was run in iso8859-1, because then, calling       *
+ #   utf8_encode was a proper conversion.                                  *
+ #   But since egw supported systemcharsets, this call led to strings      *
+ #   being encoded _twice_ before they were sent to ldap and thus being    *
+ #   encoded in some weired mix of 2 charsets.                             *
+ #   This of course confuses other LDAP-clients, because they don't        *
+ #   know about the actual charset of the data anymore.                    *
+ #   The new egw code now correctly _converts_ from every charset to utf8  *
+ #   before sending data to ldap and converts from utf8 to systemcharset   *
+ #   on reading. This of course makes it necessary, to correct the charset *
+ #   of existing entries in the ldap-branch used by egw-addressbook        *
+ #   (i.e. really make them utf-8), before the new code is being used.     *
+ #                                                                         *
+ #   How to use this script:                                               *
+ #   1. make a datadump of your ldap database (f.e. slapcat>data.ldif)     *
+ #   2. configure this script below                                        *
+ #   3. convert the dump (./fix-ldap-charset-for-egw1.1.pl data.ldif)      *
+ #   4. reimport the dump (f.e. slapadd -l data.ldif.conv)                 *
+ #                                                                         *
+ #**************************************************************************
+
+##############################################################################
+# CONFIGURATION - BEGIN
+#
+#
+# only entries below this DN will be converted
+my $basedn = "ou=addressbook,dc=domain,dc=xyz";
+# this is the systemcharset of eGW, that was used at the time
+# when the eGW-Code of your installation still was version 1.0.x or earlier
+my $egw_systemcharset = "utf-8";
+#
+#
+# CONFIGURATION - END
+##############################################################################
+
+
+# parameters
+my $filename = $ARGV[0];
+unless (-f $filename) {
+	print "usage: " . $0 . " {ldif-filename}\n";
+	exit 0;
+}
+
+# global objects
+my $iconv_outer = Text::Iconv->new("utf-8", "iso-8859-1");
+my $iconv_inner = Text::Iconv->new($egw_systemcharset, "utf-8");
+
+# get an array of all entries
+local $/;  # slurp mode
+open(FOLD, "< $filename\0") || die "error opening source-file: $filename: $!";
+flock(FOLD, 2);
+my $file = <FOLD>;
+my @old = split("\n\n",$file);
+flock(FOLD, 8);
+close(FOLD);
+
+print "\nRead " . $#old . " entries from " . $filename . "\n";
+
+# begin with conversion
+my @new = ();
+my $i = 0;
+foreach my $oldentry (@old) {
+	my $workentry = $oldentry;
+	# concatenate base64 multline data
+	$workentry =~ s/\n //g;
+	# extract the raw DN and get it's readable form
+	$workentry =~ /^(dn:[^\n]*)\n/;
+	my %dn = getAttributeValue($1);
+	# check, if this entry is to be converted
+	my $basednregexp = regexpEscape($basedn);
+	unless ($dn{'value'} =~ /^.+$basednregexp$/) {
+		push(@new, $oldentry . "\n");
+		next;
+	}
+	#
+	# This entry is to be converted
+	#
+	my $newentry = "";
+	my @attributes = split("\n", $workentry);
+	foreach my $attr (@attributes) {
+		my %attrib = getAttributeValue($attr);
+		$attrib{'value'} = $iconv_inner->convert($iconv_outer->convert($attrib{'value'}));
+		$newentry .= attrib2ldif(\%attrib);
+	}
+	push(@new,$newentry);
+	$i++;
+}
+print "Converted $i entries in $basedn\n";
+
+# write the result
+open(FNEW, "> $filename" . ".conv\0") || die "error opening destination-file: $filename" . ".conv: $!";
+flock(FNEW, 2);
+foreach(@new) {
+	print FNEW $_ . "\n";
+}
+flock(FNEW, 8);
+close(FNEW);
+
+print "Wrote $#new entries to $filename.conv\n\nPlease check the number of entries and have a look at\n$filename.conv, before reimporting it.\n\n";
+
+#####################
+# Subroutines
+#####################
+
+# break down an attribute in attribute-name and value
+# if the value is base64, decode it.
+sub getAttributeValue {
+	my ($rawattr) = @_;
+	my %attr = ();
+	if ($rawattr =~ /^([^:]*):: (.*)/) {
+		$attr{'name'} = $1;
+		$attr{'value'} = decode_base64($2);
+	} elsif ($rawattr =~ /^([^:]*): (.*)/) {
+		$attr{'name'} = $1;
+		$attr{'value'} = $2;
+	} else {
+		print "Error extracting data from attribute: " . $rawattr . "\n";
+	}
+	return %attr;
+}
+
+# escape a string for use within a regexp
+sub regexpEscape {
+	my ($string) = @_;
+	$string =~ s/([\^\.\$\|\(\)\[\]\*\+\?\{\}])/\\$1/g;
+	return $string;
+}
+
+# cahnge an attribute in suitable form for an ldif
+sub attrib2ldif {
+	my ($attrib) = @_;
+	my ($key, $value) = ($attrib->{'name'}, $attrib->{'value'});
+	# RFC2894 requires a string to be BASE64 encoded, if
+	# - it begins with a char that's not a SAFE-INIT-CHAR
+	# - or it contains a char that's not a SAFE-CHAR
+	if ($value =~ /^[: <]/ or $value =~ /[^\x01-\x09\x0b-\x0c\x0e-\x7f]/) {
+		# email-addresses can not contain unicode-characters
+		if ($key eq "mail" or $key eq "phpgwMailHome") {
+			print "Warning: forbidden characters in eMail-address detected: " . $value . "\n";
+		}
+		$value = encode_base64($value);
+		$value =~ s/\n//g;
+		# each line has to be no more than 77 characters long
+		# including a leading space and, on the first line, the key.
+		# Exceptions: dn and rdn
+		unless ($key eq "dn" or $key eq "rdn") {
+			my $keylen = length($key) + 3;
+			my $form = substr($value, 0, 77 - $keylen);
+			unless ($form eq $value) {
+				my $j = 0;
+				my $next = "";
+				do  {
+					$next = substr($value, 77 - $keylen + $j * 76, 76);
+					$form .= "\n " . $next;
+					$j++;
+				} until (length($next) < 76);
+			}
+			$value = $form;
+		}
+		$key = $key . ":: ";
+	} else {
+		$key = $key . ": ";
+	}
+	return $key . $value . "\n";
+}
--- a/phpgwapi/inc/class.common.inc.php
+++ b/phpgwapi/inc/class.common.inc.php
@ -263,7 +263,7 @@
 		Escaped Characters are: '*', '(', ')', ' ', '\', NUL
 		It's actually a PHP-Bug, that we have to escape space.
 		For all other Characters, refer to RFC2254.
-		@param $string string to be escaped
+		@param $string either a string to be escaped, or an array of values to be escaped
 		*/
 		function ldap_addslashes($string='')
 		{
--- a/phpgwapi/inc/class.contacts_ldap.inc.php
+++ b/phpgwapi/inc/class.contacts_ldap.inc.php
@ -199,7 +199,7 @@
 			{
 				foreach($stock_fieldnames as $name => $value)
 				{
-					$return_fields[0][$name] = utf8_decode($ldap_fields[0][$value][0]);
+					$return_fields[0][$name] = $GLOBALS['phpgw']->translation->convert(($ldap_fields[0][$value][0]),'utf-8');
 				}
 			}

@ -271,7 +271,7 @@
 			{
 				foreach($stock_fieldnames as $name => $value)
 				{
-					$return_fields[0][$name] = utf8_decode($ldap_fields[0][$value][0]);
+					$return_fields[0][$name] = $GLOBALS['phpgw']->translation->convert(($ldap_fields[0][$value][0]),'utf-8');
 				}
 			}

@ -415,7 +415,7 @@
 				if(is_array($query))
 				{
 					// must be fixed somehow Milosch????
-					$myfilter = $this->makefilter($filterfields,$query,'',$DEBUG);
+					$myfilter = $this->makefilter($filterfields,$GLOBALS['phpgw']->common->ldap_addslashes($query),'',$DEBUG);
 				}
 				else
 				{
@ -428,13 +428,14 @@
 						'org_name'	=> 'o',
 						'org_unit'	=> 'ou'
 					);
-					$myfilter = $this->makefilter($filterfields,$search_filter,$query,$DEBUG);
+					$myfilter = $this->makefilter($filterfields,$search_filter,$GLOBALS['phpgw']->common->ldap_addslashes($query),$DEBUG);
 				}
 			}
 			else
 			{
 				$myfilter = $this->makefilter($filterfields,'','',$DEBUG);
 			}
+			$myfilter = $GLOBALS['phpgw']->translation->convert($myfilter,$GLOBALS['phpgw']->translation->system_charset,'utf-8');

 			$sri = ldap_search($this->ldap, $GLOBALS['phpgw_info']['server']['ldap_contact_context'], $myfilter);

@ -498,7 +499,7 @@
 					{
 						foreach($stock_fieldnames as $f_name => $f_value)
 						{
-							$return_fields[$j][$f_name] = utf8_decode($ldap_fields[$i][$f_value][0]);
+							$return_fields[$j][$f_name] = $GLOBALS['phpgw']->translation->convert(($ldap_fields[$i][$f_value][0]),'utf-8');
 						}
 					}
 					$this->db->query("SELECT contact_name,contact_value FROM $this->ext_table WHERE contact_id='"
@ -525,7 +526,9 @@
 			}

 			$first = $last = "*";
-			if(strstr($query,"*"))
+			// this can only be the case, if $query is a character-query.
+			// normal queries don't allow wildcards and escape them
+			if(strstr($query,"*") && !strstr($query,"\*"))
 			{
 				if(substr($query,-1) == "*")
 				{
@ -697,7 +700,7 @@
 				{
 					if($stock_fields[$name] != '')
 					{
-						$ldap_fields[$value] = utf8_encode($stock_fields[$name]);
+						$ldap_fields[$value] = $GLOBALS['phpgw']->translation->convert($stock_fields[$name],$GLOBALS['phpgw']->translation->system_charset,'utf-8');
 					}
 				}
 			}
@ -945,12 +948,12 @@
 						if($ldap_fields[0][$fvalue] && $stock_fields[$fname] && $ldap_fields[0][$fvalue][0] != $stock_fields[$fname] )
 						{
 							//echo "<br>".$fname." => ".$fvalue." was there";
-							$err = ldap_modify($this->ldap,$dn,array($fvalue => utf8_encode($stock_fields[$fname])));
+							$err = ldap_modify($this->ldap,$dn,array($fvalue => $GLOBALS['phpgw']->translation->convert($stock_fields[$fname],$GLOBALS['phpgw']->translation->system_charset,'utf-8')));
 						}
 						elseif(!$ldap_fields[0][$fvalue] && $stock_fields[$fname])
 						{
 							//echo "<br>".$fname." not there - '".$fvalue."'";
-							$err = ldap_mod_add($this->ldap,$dn,array($fvalue => utf8_encode($stock_fields[$fname])));
+							$err = ldap_mod_add($this->ldap,$dn,array($fvalue => $GLOBALS['phpgw']->translation->convert($stock_fields[$fname],$GLOBALS['phpgw']->translation->system_charset,'utf-8')));
 						}
 						elseif($ldap_fields[0][$fvalue] && !$stock_fields[$fname])
 						{