remove ancient ldap charset fix script

2016-04-07 07:26:50 +00:00 · 2016-04-07 07:26:50 +00:00 · 52247f10b5
commit 52247f10b5
parent a2134f0f8f
1 changed files with 0 additions and 190 deletions
--- a/doc/ldap/fix-ldap-charset-for-egw1.1.pl
+++ b/doc/ldap/fix-ldap-charset-for-egw1.1.pl
@ -1,190 +0,0 @@
 #!/usr/bin/perl -w
 use strict;
 use MIME::Base64;
 use Text::Iconv;
 #**************************************************************************
 #               fix-ldap-charset-for-egw1.1.pl  -  description            *
 #                             -------------------                         *
 #    begin                : Mon 2005/08/08                                *
 #    copyright            : (C) 2005 by Carsten Wolff                     *
 #    email                : wolffc@egroupware.org                         *
 #                                                                         *
 #   This program is free software; you can redistribute it and/or modify  *
 #   it under the terms of the GNU General Public License as published by  *
 #   the Free Software Foundation; either version 2 of the License, or     *
 #   (at your option) any later version.                                   *
 #                                                                         *
 #                                                                         *
 #   This script is used to adapt the charset in an egw ldap addressbook   *
 #   to the egw code in Release 1.1 and newer.                             *
 #                                                                         *
 #                                                                         *
 #   The old egw code just called utf8_encode on every attribute before    *
 #   writing and utf8_decode after reading an ldap attribute. This was     *
 #   fine as long as egw was run in iso8859-1, because then, calling       *
 #   utf8_encode was a proper conversion.                                  *
 #   But since egw supported systemcharsets, this call led to strings      *
 #   being encoded _twice_ before they were sent to ldap and thus being    *
 #   encoded in some weired mix of 2 charsets.                             *
 #   This of course confuses other LDAP-clients, because they don't        *
 #   know about the actual charset of the data anymore.                    *
 #   The new egw code now correctly _converts_ from every charset to utf8  *
 #   before sending data to ldap and converts from utf8 to systemcharset   *
 #   on reading. This of course makes it necessary, to correct the charset *
 #   of existing entries in the ldap-branch used by egw-addressbook        *
 #   (i.e. really make them utf-8), before the new code is being used.     *
 #                                                                         *
 #   How to use this script:                                               *
 #   1. make a datadump of your ldap database (f.e. slapcat>data.ldif)     *
 #   2. configure this script below                                        *
 #   3. convert the dump (./fix-ldap-charset-for-egw1.1.pl data.ldif)      *
 #   4. reimport the dump (f.e. slapadd -l data.ldif.conv)                 *
 #                                                                         *
 #**************************************************************************
 ##############################################################################
 # CONFIGURATION - BEGIN
 #
 #
 # only entries below this DN will be converted
 my $basedn = "ou=addressbook,dc=domain,dc=xyz";
 # this is the systemcharset of eGW, that was used at the time
 # when the eGW-Code of your installation still was version 1.0.x or earlier
 my $egw_systemcharset = "utf-8";
 #
 #
 # CONFIGURATION - END
 ##############################################################################
 # parameters
 my $filename = $ARGV[0];
 unless (-f $filename) {
 	print "usage: " . $0 . " {ldif-filename}\n";
 	exit 0;
 }
 # global objects
 my $iconv_outer = Text::Iconv->new("utf-8", "iso-8859-1");
 my $iconv_inner = Text::Iconv->new($egw_systemcharset, "utf-8");
 # get an array of all entries
 local $/;  # slurp mode
 open(FOLD, "< $filename\0") || die "error opening source-file: $filename: $!";
 flock(FOLD, 2);
 my $file = <FOLD>;
 my @old = split("\n\n",$file);
 flock(FOLD, 8);
 close(FOLD);
 print "\nRead " . $#old . " entries from " . $filename . "\n";
 # begin with conversion
 my @new = ();
 my $i = 0;
 foreach my $oldentry (@old) {
 	my $workentry = $oldentry;
 	# concatenate base64 multline data
 	$workentry =~ s/\n //g;
 	# extract the raw DN and get it's readable form
 	$workentry =~ /^(dn:[^\n]*)\n/;
 	my %dn = getAttributeValue($1);
 	# check, if this entry is to be converted
 	my $basednregexp = regexpEscape($basedn);
 	unless ($dn{'value'} =~ /^.+$basednregexp$/) {
 		push(@new, $oldentry . "\n");
 		next;
 	}
 	#
 	# This entry is to be converted
 	#
 	my $newentry = "";
 	my @attributes = split("\n", $workentry);
 	foreach my $attr (@attributes) {
 		my %attrib = getAttributeValue($attr);
 		$attrib{'value'} = $iconv_inner->convert($iconv_outer->convert($attrib{'value'}));
 		$newentry .= attrib2ldif(\%attrib);
 	}
 	push(@new,$newentry);
 	$i++;
 }
 print "Converted $i entries in $basedn\n";
 # write the result
 open(FNEW, "> $filename" . ".conv\0") || die "error opening destination-file: $filename" . ".conv: $!";
 flock(FNEW, 2);
 foreach(@new) {
 	print FNEW $_ . "\n";
 }
 flock(FNEW, 8);
 close(FNEW);
 print "Wrote $#new entries to $filename.conv\n\nPlease check the number of entries and have a look at\n$filename.conv, before reimporting it.\n\n";
 #####################
 # Subroutines
 #####################
 # break down an attribute in attribute-name and value
 # if the value is base64, decode it.
 sub getAttributeValue {
 	my ($rawattr) = @_;
 	my %attr = ();
 	if ($rawattr =~ /^([^:]*):: (.*)/) {
 		$attr{'name'} = $1;
 		$attr{'value'} = decode_base64($2);
 	} elsif ($rawattr =~ /^([^:]*): (.*)/) {
 		$attr{'name'} = $1;
 		$attr{'value'} = $2;
 	} else {
 		print "Error extracting data from attribute: " . $rawattr . "\n";
 	}
 	return %attr;
 }
 # escape a string for use within a regexp
 sub regexpEscape {
 	my ($string) = @_;
 	$string =~ s/([\^\.\$\|\(\)\[\]\*\+\?\{\}])/\\$1/g;
 	return $string;
 }
 # cahnge an attribute in suitable form for an ldif
 sub attrib2ldif {
 	my ($attrib) = @_;
 	my ($key, $value) = ($attrib->{'name'}, $attrib->{'value'});
 	# RFC2894 requires a string to be BASE64 encoded, if
 	# - it begins with a char that's not a SAFE-INIT-CHAR
 	# - or it contains a char that's not a SAFE-CHAR
 	if ($value =~ /^[: <]/ or $value =~ /[^\x01-\x09\x0b-\x0c\x0e-\x7f]/) {
 		# email-addresses can not contain unicode-characters
 		if ($key eq "mail" or $key eq "phpgwMailHome") {
 			print "Warning: forbidden characters in eMail-address detected: " . $value . "\n";
 		}
 		$value = encode_base64($value);
 		$value =~ s/\n//g;
 		# each line has to be no more than 77 characters long
 		# including a leading space and, on the first line, the key.
 		# Exceptions: dn and rdn
 		unless ($key eq "dn" or $key eq "rdn") {
 			my $keylen = length($key) + 3;
 			my $form = substr($value, 0, 77 - $keylen);
 			unless ($form eq $value) {
 				my $j = 0;
 				my $next = "";
 				do  {
 					$next = substr($value, 77 - $keylen + $j * 76, 76);
 					$form .= "\n " . $next;
 					$j++;
 				} until (length($next) < 76);
 			}
 			$value = $form;
 		}
 		$key = $key . ":: ";
 	} else {
 		$key = $key . ": ";
 	}
 	return $key . $value . "\n";
 }