syncevolution/src/normalize_vcard.pl

#! /usr/bin/perl -w

use strict;

sub Usage {
  print "normalize_vcard <vcards.vcf\n";
  print "                 vcards1.vcf vcards2.vcf\n\n";
  print "Either normalizes one file (stdin or single argument)\n";
  print "or compares the two files.\n";
}

sub Normalize {
  my $in = shift;
  my $out = shift;

  $_ = join( "", grep( !/^(BEGIN:VCARD|BEGIN:VCALENDAR|VERSION|END:VCARD|END:VCALENDAR|UID:)/, <$in> ) );
  s/\r//g;

  my @cards = ();

  foreach $_ ( split( /\n\n/ ) ) {
    # undo line continuation
    s/\n\s//gs;
    # ignore charset specifications, assume UTF-8
    s/;CHARSET="UTF-8"//g;

    # ignore extra email type
    s/^EMAIL(.*);TYPE=INTERNET/EMAIL$1/mg;
    s/^EMAIL(.*);TYPE=OTHER/EMAIL$1/mg;
    # ignore extra ADR type
    s/^ADR;TYPE=OTHER/ADR/mg;
    # ignore TYPE=PREF in address, does not matter in Evolution
    s/^((ADR|LABEL)[^:]*);TYPE=PREF/$1/mg;
    # ignore extra separators in multi-value fields
    s/^((ORG|N|(ADR[^:]*)):.*?);*$/$1/mg;
    # the type of certain fields is ignore by Evolution
    s/^X-(AIM|GROUPWISE|ICQ|YAHOO);TYPE=HOME/X-$1/gm;
    # TYPE=VOICE is the default in Evolution and may or may not appear in the vcard
    s/^TEL([^:]*);TYPE=VOICE([^:]*):/TEL$1$2:/mg;
    # don't care about the TYPE property of PHOTOs
    s/^PHOTO;(.*)TYPE=[A-Z]*/PHOTO;$1/mg;
    # encoding is not case sensitive, skip white space in the middle of binary data
    if (s/^PHOTO;.*?ENCODING=(b|B|BASE64).*?:\s*/PHOTO;ENCODING=B: /mgi) {
      while (s/^PHOTO(.*?): (\S+)[\t ]+(\S+)/PHOTO$1: $2$3/mg) {}
    }

    # remove extra timezone specification, it is not supported
    # by SyncEvolution
    s/BEGIN:VTIMEZONE.*?END:VTIMEZONE\r?\n?//gs;
    # remove fields which may differ
    s/^(PRODID|CREATED|LAST-MODIFIED):.*\r?\n?//gm;
    # remove optional fields
    s/^(METHOD):.*\r?\n?//gm;

    # replace parameters with a sorted parameter list
    s!^([^;:]*);(.*?):!$1 . ";" . join(';',sort(split(/;/, $2))) . ":"!meg;


    # sort entries, putting "N:" resp. "SUMMARY:" first
    # then modify entries to cover not more than
    # 60 characters
    my @lines = split( "\n" );
    push @cards, join( "\n",
                       grep( ( s/(.{60})/$1\n /g || 1),
                             grep( /^(N|SUMMARY):/, @lines ),
                             sort( grep ( !/^(N|SUMMARY):/, @lines ) ) ) );
  }

  print $out join( "\n\n", sort @cards ), "\n";
}

if($#ARGV > 1) {
  # error
  Usage();
  exit 1;
} elsif($#ARGV == 1) {
  # comparison

  my ($file1, $file2) = ($ARGV[0], $ARGV[1]);
  my $normal1 = `mktemp`;
  my $normal2 = `mktemp`;
  chomp($normal1);
  chomp($normal2);

  open(IN1, "<$file1") || die "$file1: $!";
  open(IN2, "<$file2") || die "$file2: $!";
  open(OUT1, ">$normal1") || die "$normal1: $!";
  open(OUT2, ">$normal2") || die "$normal2: $!";
  Normalize(*IN1{IO}, *OUT1{IO});
  Normalize(*IN2{IO}, *OUT2{IO});
  close(IN1);
  close(IN2);
  close(OUT1);
  close(OUT2);

  my $res = system( "diff", "--suppress-common-lines", "-y", $normal1, $normal2 );
  
  unlink($normal1);
  unlink($normal2);
  exit($res ? 1 : 0);
} else {
  # normalize
  my $in;
  if( $#ARGV >= 0 ) {
    open(IN, "<$ARGV[0]") || die "$ARGV[0]: $!";
    $in = *IN{IO};
  } else {
    $in = *STDIN{IO};
  }

  Normalize($in, *STDOUT{IO});
}
added testing of syncing git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@12 15ad00c4-1369-45f4-8270-35d70d36bdcd 2005-12-10 20:16:02 +01:00			`#! /usr/bin/perl -w`

			`use strict;`

normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`sub Usage {`
			`print "normalize_vcard <vcards.vcf\n";`
			`print " vcards1.vcf vcards2.vcf\n\n";`
			`print "Either normalizes one file (stdin or single argument)\n";`
			`print "or compares the two files.\n";`
added testing of syncing git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@12 15ad00c4-1369-45f4-8270-35d70d36bdcd 2005-12-10 20:16:02 +01:00			`}`

normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`sub Normalize {`
			`my $in = shift;`
			`my $out = shift;`

now also handles calendar comparison git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@70 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-17 11:37:43 +02:00			`$_ = join( "", grep( !/^(BEGIN:VCARD\|BEGIN:VCALENDAR\|VERSION\|END:VCARD\|END:VCALENDAR\|UID:)/, <$in> ) );`
normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`s/\r//g;`

			`my @cards = ();`

			`foreach $_ ( split( /\n\n/ ) ) {`
			`# undo line continuation`
			`s/\n\s//gs;`
			`# ignore charset specifications, assume UTF-8`
			`s/;CHARSET="UTF-8"//g;`
now also handles calendar comparison git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@70 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-17 11:37:43 +02:00
normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`# ignore extra email type`
fixed the fix of EMAIL normalization... git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@84 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-24 19:32:46 +02:00			`s/^EMAIL(.*);TYPE=INTERNET/EMAIL$1/mg;`
			`s/^EMAIL(.*);TYPE=OTHER/EMAIL$1/mg;`
normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`# ignore extra ADR type`
ignore some more irrelevant vcard differences (TYPE=PREF, extra separators) git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@55 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-19 10:54:53 +01:00			`s/^ADR;TYPE=OTHER/ADR/mg;`
			`# ignore TYPE=PREF in address, does not matter in Evolution`
			`s/^((ADR\|LABEL)[^:]*);TYPE=PREF/$1/mg;`
			`# ignore extra separators in multi-value fields`
			`s/^((ORG\|N\|(ADR[^:])):.?);*$/$1/mg;`
normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`# the type of certain fields is ignore by Evolution`
ignore some more irrelevant vcard differences (TYPE=PREF, extra separators) git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@55 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-19 10:54:53 +01:00			`s/^X-(AIM\|GROUPWISE\|ICQ\|YAHOO);TYPE=HOME/X-$1/gm;`
normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`# TYPE=VOICE is the default in Evolution and may or may not appear in the vcard`
			`s/^TEL([^:]);TYPE=VOICE([^:]):/TEL$1$2:/mg;`
hide differences due to different PHOTO parameters, fixed EMAIL;INTERNET;OTHER git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@83 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-24 18:37:01 +02:00			`# don't care about the TYPE property of PHOTOs`
			`s/^PHOTO;(.)TYPE=[A-Z]/PHOTO;$1/mg;`
added support for testing against www.scheduleworld.com git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@85 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-24 20:49:03 +02:00			`# encoding is not case sensitive, skip white space in the middle of binary data`
			`if (s/^PHOTO;.?ENCODING=(b\|B\|BASE64).?:\s*/PHOTO;ENCODING=B: /mgi) {`
avoid eating the trailing newline after PHOTO git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@86 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-24 22:00:18 +02:00			`while (s/^PHOTO(.*?): (\S+)[\t ]+(\S+)/PHOTO$1: $2$3/mg) {}`
added support for testing against www.scheduleworld.com git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@85 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-24 20:49:03 +02:00			`}`
now also handles calendar comparison git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@70 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-17 11:37:43 +02:00
			`# remove extra timezone specification, it is not supported`
			`# by SyncEvolution`
			`s/BEGIN:VTIMEZONE.*?END:VTIMEZONE\r?\n?//gs;`
			`# remove fields which may differ`
			`s/^(PRODID\|CREATED\|LAST-MODIFIED):.*\r?\n?//gm;`
			`# remove optional fields`
			`s/^(METHOD):.*\r?\n?//gm;`

normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`# replace parameters with a sorted parameter list`
			`s!^([^;:]);(.?):!$1 . ";" . join(';',sort(split(/;/, $2))) . ":"!meg;`


now also handles calendar comparison git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@70 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-17 11:37:43 +02:00			`# sort entries, putting "N:" resp. "SUMMARY:" first`
format normalized vcards with 60 columns for easier side-by-side diff git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@44 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-18 14:29:15 +01:00			`# then modify entries to cover not more than`
			`# 60 characters`
normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`my @lines = split( "\n" );`
format normalized vcards with 60 columns for easier side-by-side diff git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@44 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-18 14:29:15 +01:00			`push @cards, join( "\n",`
			`grep( ( s/(.{60})/$1\n /g \|\| 1),`
now also handles calendar comparison git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@70 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-04-17 11:37:43 +02:00			`grep( /^(N\|SUMMARY):/, @lines ),`
			`sort( grep ( !/^(N\|SUMMARY):/, @lines ) ) ) );`
normalize_vcard can now also compare two files automatically; it's also installed without .pl suffix git-svn-id: https://zeitsenke.de/svn/SyncEvolution/trunk@43 15ad00c4-1369-45f4-8270-35d70d36bdcd 2006-03-15 23:09:04 +01:00			`}`

			`print $out join( "\n\n", sort @cards ), "\n";`
			`}`

			`if($#ARGV > 1) {`
			`# error`
			`Usage();`
			`exit 1;`
			`} elsif($#ARGV == 1) {`
			`# comparison`

			`my ($file1, $file2) = ($ARGV[0], $ARGV[1]);`
			my $normal1 = `mktemp`;
			my $normal2 = `mktemp`;
			`chomp($normal1);`
			`chomp($normal2);`

			`open(IN1, "<$file1") \|\| die "$file1: $!";`
			`open(IN2, "<$file2") \|\| die "$file2: $!";`
			`open(OUT1, ">$normal1") \|\| die "$normal1: $!";`
			`open(OUT2, ">$normal2") \|\| die "$normal2: $!";`
			`Normalize(IN1{IO}, OUT1{IO});`
			`Normalize(IN2{IO}, OUT2{IO});`
			`close(IN1);`
			`close(IN2);`
			`close(OUT1);`
			`close(OUT2);`

			`my $res = system( "diff", "--suppress-common-lines", "-y", $normal1, $normal2 );`

			`unlink($normal1);`
			`unlink($normal2);`
			`exit($res ? 1 : 0);`
			`} else {`
			`# normalize`
			`my $in;`
			`if( $#ARGV >= 0 ) {`
			`open(IN, "<$ARGV[0]") \|\| die "$ARGV[0]: $!";`
			`$in = *IN{IO};`
			`} else {`
			`$in = *STDIN{IO};`
			`}`

			`Normalize($in, *STDOUT{IO});`
			`}`