#!/usr/bin/perl -w

# Thread a bunch of mail messages
# Based on jwz's threading algorithm
# Created: Waider August 29 2000
# Modified:
# Waider |2001/03/14| clean up and comment code, add HTMLifyer
# Waider |2001/03/15| tweak threading algorithm, date sorting
# Waider |2001/03/20| adding navigation
# Waider |2001/03/24| discarded nav, trying to improve threading further

# -----------------------------------------------------------------------------
package Message;

use Date::Parse;

# Message object, consisting of:
#  author
#  date
#  subject
#  msgid
#  references/in-reply-to
#  reference to message itself

# Constructor
sub new {
  my ( $type, $subject, $id, $msg ) = @_;

  my $self = bless {}, $type;

  $self->{'subject'} = $subject;
  $self->{'message_id'} = $id;
  $self->{'message'} = $msg;

  $self;
}

# Author accessor (read/write)
sub author {
  my ( $self, $auth ) = @_;

  if ( defined( $auth )) {
	$self->{'author'} = $auth;
  }

  $self->{'author'};
}

# Date accessor (read/write)
sub date {
  my ( $self, $date ) = @_;

  if ( defined( $date )) {
	$self->{'date'} = str2time( $date ) || die "Can't parse $date\n";
  }

  $self->{'date'};
}

# Subject accessor (read-only)
sub subject {
  my $self = shift;
  $self->{'subject'};
}

# Message Id accessor (read-only)
sub message_id {
  my $self = shift;
  $self->{'message_id'};
}

# References accessor (read-only)
sub references {
  my $self = shift;
  $self->{'references'};
}

# Add a reference
sub add_reference {
  my ( $self, $ref ) = @_;
  $self->{'references'} = push @{$self->{'references'}}, $ref;
}

# Message accessor (read/write)
sub message {
  my ( $self, $msg ) = @_;

  if ( defined( $msg )) {
 	if ( defined( $self->{'message'})) {
  	  die "Duplicate message ids";
  	} else {
  	  $self->{'message'} = $msg;
  	}
  }

  $self->{'message'};
}

# -----------------------------------------------------------------------------
package Container;

# Container object, consisting of:
#   message (Message object)
#   parent (Container object)
#   children (array of Container objects)
#   date (date of message or youngest child)
sub new {
  my ( $type, $msg ) = @_;

  my $self = bless {}, $type;
  $self->message( $msg );

  $self;
}

# Message accesor (read/write)
sub message {
  my ( $self, $msg ) = @_;

  if ( defined( $msg )) {
	$self->{'message'} = $msg;
	$self->date( $msg->date );
  }
  $self->{'message'};
}

# Message ID accessor (read-only)
sub message_id {
  my ( $self ) = @_;
  if ( !defined( $self->{'message'})) {
	return undef;
  }
  return $self->{'message'}->message_id;
}

# Date accesor (read/write)
sub date {
  my ( $self, $date ) = @_;

  if ( defined( $date )) {
	$self->{'date'} = $date
	  if ( !defined( $self->{'date' }) || ( $date < $self->{'date'}));
  }
  $self->{'date'};
}

# Parent accessor (read/write)
sub parent {
  my ( $self, $cont ) = @_;
  if ( defined( $cont )) {
	$self->{'parent'} = $cont;
	if ( ref( $cont ) eq "Container" ) {
	  $cont->date( $self->{'date'} );
	}
  }


  $self->{'parent'} || "";
}

# Child accessor (read/write)
sub child {
  my ( $self, $cont ) = @_;
  my $cref = $self->{'child'};
  my @cref;

  if ( defined( $cont )) {
	if ( defined( $cref )) {
	  @cref = @{$cref};
	}

	if ( !$self->is_child( $cont )) { # hmm. I maybe shouldn't need this.
	  push @cref, $cont;
	  $self->{'child'} = \@cref;
	}
  }

  if ( defined( $cref )) {
	@{$cref};
  } else {
	@cref;
  }
}

# Is the specified container a child of this container?
sub is_child {
  my ( $self, $cont ) = @_;
  my $cref = $self->{'child'};

  if ( defined( $cref )) {
	for my $c ( @{$cref} ) {
	  if ( $c eq $cont ) {
		return 1;
	  }
	}
  }

  return 0;
}

# Return a string representing the Container:
# message id if available,
# [no message id] if no message id
# [empty container] if no message
sub as_string {
  my ( $self ) = @_;

  if ( defined( $self->message )) {
	if ( defined( $self->message->message_id )) {
	  return $self->message->message_id;
	} else {
	  return "[no message id]";
	}
  } else {
	return "[empty container]";
  }
}

# Delete the specified child
sub delete_child {
  my ( $self, $cont ) = @_;
  my $cref = $self->{'child'};

  if ( defined( $cref )) {
	my @cref;
	for my $c ( @{$cref} ) {
	  if ( $c ne $cont ) {
		push @cref, $c;
	  }
	}
	$self->{'child'} = \@cref; # store
  }
}

# ------------------------------

package main;

my %id_table;
my @roots;

# regexp for pruning/matching RE: DSPsrv: etc. AW is german, I
# think. FW is for forwards.
my $re_exp = "^\\s*((\\[?dspsrv[:\\s\\]]*)?((re|aw|fw)(\\[\\d+\\])*: *))+";


# Parse the text of an In-Reply-To header into a valid message ID.
sub parse_inreplyto {
  my ( $rep ) = @_;
  my $ref;

  # We could do smarter stuff here, like parsing "FOO's message of BAR"
  ( $ref ) = $rep =~ m|.*?(\<.*?\>)|;
  warn "$rep" if $rep =~ /message of/i && !defined($ref);

  $ref = msgid_clean( $ref ) if defined( $ref );

  $ref;
}

# Parse a References header into a list of message IDs.
sub parse_references {
  my ( $ref ) = @_;
  my @refs;

  for my $r ( split( /\s+/, $ref )) {
	push @refs, msgid_clean( $r );
  }

  # fixme validate

  @refs;
}

# Run a loop check on adding the second parameter as a child of the
# first, both Containers.
sub not_a_loop {
  my ( $a, $b ) = @_;
  my ( $rnext, @next );

  die "Wrong arg types" unless ref( $a ) eq "Container" and
	ref( $b ) eq "Container";

  return 0 if $a eq $b;

  @next = $b->child;
  return 1 if !( @next );

  # recurse!
  for my $next ( @next ) {
	return 1 if !defined( $next );
	return 0 unless not_a_loop( $a, $next );
  }

  return 1;
}

use lib "$ENV{HOME}/src/perl";
use MboxHack; # because Mail::Folder::Mbox blows goats
use MIME::Parser;
use Storable;
use Date::Format;
use diagnostics;

select( STDERR ); $| = 1; select( STDOUT );

# Iterate over supplied folders
while ( @ARGV ) {
  my $file = shift;;
  if ( !( $folder = new MboxHack $file )) {
	print STDERR "$file is not a mbox folder\n";
	next;
  }

  $folder->set_readonly();
  $count = $folder->qty();

  # Make folder directory
  my $htmldir = "$ {file}-html";
  -d $htmldir or mkdir $htmldir, 0755;

  # Load the id_table
  if ( -f "idtable" ) {
	print STDERR "Loading idtable...";
	$idref = retrieve( "idtable" );
	%id_table = %{$idref};
	print STDERR "done.\n";
  }

  # Iterate over messages in the folder
  print STDERR "Processing $count messages";
  for my $i ( 1 .. $count ) {
	# For each message:
	my $message = $folder->get_message( $i );
	print STDERR ".";

	# Clean up the message here
	$message = clean_message( $message );

	# HTMLify the message, preserving any navigation stuff that's
	# already there.
	my @current_html;
	if ( open( HTML, "<$ {htmldir}/$ {i}.html" )) {
	  @current_html = grep /^\<\!-- nav /, <HTML>;
	  close( HTML );
	}
	open( HTML, ">$ {htmldir}/$ {i}.html" );
	print HTML join( "", @current_html ) if @current_html;
	print HTML message_to_html( $message, $file, $i, $count );
	close( HTML );

	# Get required head bits
	my $head = $message->head();
	my $author = $head->get( "From" );
	my $subj = $head->get( "Subject" ) || "";
	my $msgid = $head->get( "Message-ID" ) || "";
	my $date = $head->get( "Date" );
	my $refs = $head->get( "References" ) || "";
	my $inrep = $head->get( "In-Reply-To" ) || "";

	# ARGH.
	chomp( $author );
	chomp( $subj );
	$msgid = msgid_clean( $msgid );
	chomp( $date );
	chomp( $refs );
	chomp( $inrep );

	my @refs = parse_references( $refs );

	if (!@refs && $inrep ) {
	  my $r = parse_inreplyto( $inrep );
	  defined( $r ) and push @refs, $r;
	}

	# 1 A. Check if it's in the table; if it is, stash the message, if
	# not, make an empty container and stash the message.
	my $cont = $id_table{ $msgid } || new Container;
	my $msg = new Message( $subj, $msgid, $folder->get_message_hash( $i ));
	$msg->author( $author );
	$msg->date( $date );
	$cont->message( $msg );
	$id_table{ $msgid } = $cont;

	# No references? Skip to the next message.
	if ( !@refs  ) {
	  next;
	}

	# 1 B. For each reference:
	for my $i ( 0..$#refs ) {
	  my $ref = $refs[ $i ];

	  # find a container
	  my $rcont = $id_table{ $ref };

	  # if there isn't one, make one with a null message.
	  if ( !defined( $rcont )) {
		$rcont = new Container( new Message( undef, $ref, undef ));
		$id_table{ $ref } = $rcont;
	  }

	  # 1 B 2 Link the messages together in the order implied by the
	  # References header, except
	  #   1. don't break existing links and
	  #   2. don't make loops.
	  if ( $i > 0 ) { # can't link zeroth element to anything above it
		my $parent_ref = $refs[ $i - 1 ];
		my $parent = $id_table{ $parent_ref };

		# Do we have a parent?
		if ( !$rcont->parent ) {
		  # loop check
		  if ( not_a_loop( $parent, $rcont )) {
			$rcont->parent( $parent );
			$parent->child( $rcont );
		  }
		}
	  }
	}

	# 1 C
	# Set parent of this message = $refs[ -1 ];
	if ( $cont->parent ) {
	  my $old = $cont->parent;
	  if ( defined( $old )) {
		$old->delete_child( $cont );
	  }
	}

	my $parent = $id_table{ $refs[ -1 ]}; # Has to exist!
	if ( defined( $parent )) {
	  if ( not_a_loop( $cont, $parent )) {
		$cont->parent( $parent );
		$parent->child( $cont );
	  }
	}
  } # end of message iteration loop, i.e. we're done reading the mailbox.

  # Safe (and prudent) to close the folder now.
  $folder->close();
  print STDERR "done.\n";

  print STDERR "Saving idtable...";
  store \%id_table, "idtable";
  print STDERR "done.\n";

  # 2 Find root set, i.e. all messages with no parents
  for my $m ( keys %id_table ) {
	if ( $id_table{ $m }->parent) {
	} else {
	  push @roots, $id_table{$m};
	}
	# 3 Discard id_table
	# delete $id_table{ $m } unless @ARGV;
	# let's not. let's keep it for message output below.
  }

  # 4 prune empties
  print STDERR "Pruning empties...";
  @roots = map { prune_empties( $_ ) } @roots;
  @roots = grep { defined( $_ )} @roots;
  print STDERR "done.\n";

  # 5 group root set by subject
  print STDERR "Grouping roots by subject...";

  # 5 A new hashtable
  my %subject_table;
  for my $cont ( @roots ) {
	# 5 B Find subject of this subtree
	my $subject;
	if ( defined( $cont->message )) {
	  $subject = $cont->message->subject || "";
	} else {
	  $subject = ($cont->child)[0]->message->subject;
	}

	# Strip out rubbish
	$subject = subject_clean( $subject );

	$subject or next; # give up if subject is now effectively useless.

	if ( !defined( $subject_table{ $subject })) {
	  $subject_table{ $subject } = $cont;
	} else {
	  my $oldm = $subject_table{ $subject }->message;
	  my $newm = $cont->message;

	  # empty supersedes non-empty
	  if ( defined( $oldm ) && !defined( $newm )) {
		$subject_table{ $subject } = $cont;
	  } else {
		if ( defined( $newm )) {
		  # non-RE: supersedes RE:
		  if ( $oldm->subject =~ /$re_exp/i && $newm !~ /$re_exp/i ) {
			$subject_table{ $subject } = $cont;
		  }
		}
	  }
	}
  }
  print STDERR "done.\n";

  my @old_roots = map { defined( $_ )?$_:() } @roots;
  @roots = ();

  # 5 C
  # Gather together the difference (what?)
  print STDERR "Threading remainder...";
  for my $cont ( @old_roots ) {

	# Find subject of this subtree.
	my $subject;
	if ( defined( $cont->message )) {
	  $subject = $cont->message->subject || "";
	} else {
	  $subject = ($cont->child)[0]->message->subject; # whee!
	}

	# Strip out rubbish
	$subject = subject_clean( $subject );

	# give up loop if subject is now effectively useless, but save it
	# as a root.
	if ( !$subject ) {
	  push @roots, $cont;
	  next;
	}

	# skip it if the hash is this container or a null container
	my $orig = $subject_table{ $subject };
	if ( $orig eq $cont ) {
	  push @roots, $cont unless grep { $_ eq $orig } @roots;
	  next;
	}

	if ( !defined( $orig->message )) {
	  push @roots, $orig unless grep { $_ eq $orig } @roots;
	  next;
	}

	# Now all hell breaks loose.

	# If they're both empties, merge the kids.
	if ( !defined( $orig->message ) && ( !defined( $cont->message ))) {
	  for my $c ( $cont->child ) {
		$orig->child( $c );
		# $cont->delete_child( $c ); no need, we're discarding it anyway.
	  }

	  # Stack it if it's not already in the list.
	  push @roots, $orig unless grep { $_ eq $orig } @roots;
	  $cont = undef; # trash the original
	  next;
	}

	# If one's empty and the other's not, make the non-empty be a
	# child of the empty.
	# If one is non-empty, non-re, and this one is not empty and
	# re, make the re a child of the non-re.
	if (( !defined( $orig->message ) && defined( $cont->message )) or
		( defined( $orig->message ) && defined( $cont->message ) &&
		  $orig->message->subject !~ /$re_exp/i &&
		  $cont->message->subject =~ /$re_exp/i )){
	  # original is empty, current is not.
	  $orig->child( $cont );
	  $cont->parent( $orig );
	  @roots = map { $_ eq $cont ? () : $_ } @roots;
	  push @roots, $orig unless grep { $_ eq $orig } @roots;
	  next;
	}

	if (( defined( $orig->message ) && !defined( $cont->message )) or
		( defined( $orig->message ) && defined( $cont->message ) &&
		  ( $cont->message->subject !~ /$re_exp/i )  &&
		  ( $orig->message->subject =~ /$re_exp/i ))) {
	  # original is not empty, current is.
	  $cont->child( $orig );
	  $orig->parent( $cont );
	  @roots = map { $_ eq $orig ? () : $_ } @roots;
	  push @roots, $cont unless grep { $_ eq $cont } @roots;
	  next;
	}

	# If all else fails, make a new empty and make both of these
	# siblings, but only if these are both Re: messages.
	# The latter prevents false trees from being created.
	if ( defined( $orig->message ) && defined( $cont->message ) &&
	   ( $cont->message->subject !~ /$re_exp/i ) &&
	   ( $orig->message->subject !~ /$re_exp/i )) {
	  push @roots, $cont;
	} else {
	  my $ncont = new Container;
	  $ncont->child( $orig );
	  $orig->parent( $ncont );
	  $ncont->child( $cont );
	  $cont->parent( $ncont );
	  $subject_table{ $subject } = $ncont;

	  # Clean up roots list
	  @roots = map { $_ eq $orig ? () : $_ } @roots;
	  push @roots, $ncont;
	}
  }
  print STDERR "done.\n";

  # 6 we could now discard the parent links, really

  # 7 order the siblings by date (or whatever)
}

# Dump out threading information
print STDERR "Dumping messages...";
print "<ul>\n";
@roots = sort container_date_sort @roots;
for my $cont ( 0..$#roots ) {
  print "<li>";
  dump_message( $roots[ $cont ], 0 ) if defined( $cont );
  print "</li>\n";
}
print "</ul>\n";

print STDERR "done.\n";

# Sort containers by date
sub container_date_sort {
  my ( $c1, $c2 ) = ( $a, $b );
  my $d1 = defined( $c1->date ) ? $c1->date : 0;
  my $d2 = defined( $c2->date ) ? $c2->date : 0;
  $d1 <=> $d2;
}

# DIAGNOSTIC, KINDA
sub dump_message {
  my ( $container, $i ) = @_;
  my $message = $container->message;
  my $subject = "[no subject]";
  my $author = "[no author]";
  my $date = "[no date]";
  my $msgid = "[no message]";
  if ( defined( $message )) {
	$subject = $message->subject;
	$subject ||= "[no subject]";
	$msgid = $message->message_id;
	$msgid ||= "[no msgid]";
	$date = $message->date;
	$date ||= 0;
	$author = $message->author;
	$author ||= "[no author]";
	$author =~ s/\</\&lt;/;
	if ( !$message->message ) {
	  $subject = "[EMPTY MESSAGE]";
	}
  }

  my $m;
  if ( $msgid eq "[no message]" ) {
	print "[PARENT NOT FOUND]"
  } else {
	if ( defined( $msgid ) && ( $m = find_ref( $msgid ))) {
	  $m =~ s|/|-html/|;
	  $m.= ".html";
	  print qq(<b><a href="$m">$subject</a></b>);
	} else {
	  print "<b>$subject</b>";
	}
	print qq( <em>$author</em> );
	print time2str( "%b %d", $date );
  }

  if ( $container->child ) {
	print "<ul>\n";
	for my $c ( sort container_date_sort $container->child ) {
	  if ( !defined( $c )) {
		die "XXX undefined child\n"; # Can't happen!
	  } else {
		print "<li>\n";
		dump_message( $c, $i + 1);
		print "</li>\n";
	  }
	}
	print "</ul>\n";
  }
}

# Clean up a message id
sub msgid_clean {
  my $msgid = shift;
  chomp( $msgid );
  $msgid =~ s/[\<\>]//g;
  $msgid;
}

# Look up a message ID and see if we can link it.
sub find_ref {
  my $msgid = msgid_clean( $_[ 0 ] );

  if ( defined( $id_table{ $msgid })) {
	my $msg = $id_table{ $msgid }->message;
	if ( defined( $msg )) {
	  my $href = $msg->message;
	  if ( defined( $href )) {
		return $href->[0] . "/" . $href->[2];
	  }
	}
  }

  return undef;
}

# Return a possible link for a message ID.
sub maybe_link_msgid {
  my $l = shift;
  my $li = find_ref( $l );
  if ( defined( $li )) {
	$li =~ s|/|-html/|;
	"<a href='../$ {li}.html'>&lt;$l&gt;</a>";
  } else {
	"&lt;$l;&gt;";
  }
}

# Convert a message to HTML.
sub message_to_html {
  my ( $message, $file, $i, $count, $nottop ) = @_;
  my $parser = new MIME::Parser;
  my $output_dir = "$ {file}-html/$ {i}";
  my $ret = "";
  $nottop ||= 0;

  # Stock goop.
  if ( !$nottop ) {
	$ret .= "<html><head><title>$file, message $i</title></head>\n";
	$ret .= "<body>\n";
  }

  # Clean up output dir before we parse
  if ( -d $output_dir && !$nottop ) {
	if ( opendir( DIR, $output_dir )) {
	  my @files = grep !/^\.\.?$/, readdir( DIR );
	  closedir( DIR );
	  for my $f ( @files ) {
		unlink "$output_dir/$f";
	  }
	}
  } else {
	mkdir $output_dir, 0755;
  }

  $parser->output_dir( $output_dir );
  $parser->output_to_core( 0 );


  my @lines = ( @{$message->header}, "\n", @{$message->body});
  my $entity;

  if ( $entity = $parser->parse_data( \@lines )) {
	if ( !$entity->is_multipart ) {
	  $entity->make_multipart; # force mail to always be multipart. easier.
	}

	# Dump out headers
	my $head = $message->head;
	$ret .= "<table cellspacing=\"0\" cellpadding=\"0\" border=\"0\">\n";
#	for my $h ( qw( Date From Subject Message-Id References In-Reply-To )) {
	for my $h ( $head->tags ) {
	  # Skip some headers that we couldn't throw away
	  next if ( $h =~ /^(Content-Type|Content-Transfer-Encoding|Content-Length|MIME-Version|From )$/i );
	  my $text = $head->get( $h );
	  $text ||= "";
	  chomp( $text ); # lose lose lose
	  if ( $h =~ /^(Message-Id|References|In-Reply-To)$/ ) {
		$text =~ s|\<([^\n>]+)\>|maybe_link_msgid( $1 )|eig;
	  } else {
		$text =~ s/\</&lt;/g; # unhappiness. uri_escape ain't what I want.
	  }
	  $text =~ s/[\r\n]/<br>\n\&nbsp; \&nbsp; \&nbsp;/g; # wrap a la Mail

	  $ret .=
		qq(<tr valign="top"><th align="right">$h:</th><td>&nbsp;</td><td>$text</td></tr>\n)
		if $text;
	}
	$ret .= "</table>\n<br>\n";

	# Now deal with the body.
	my $content_type = $entity->effective_type;
	$content_type ||= "text/plain";

	if ( $content_type eq "multipart/alternative" ) {
	  my @parts;
	  for my $p ( $entity->parts ) {
		if ( $p->effective_type eq "text/html" ) { # selecting HTML
		  push @parts, $p;
		} else {
		  if ( defined( $p->bodyhandle )) {
			unlink( $p->bodyhandle->path ); # discard text
		  }
		}
	  }
	  $entity->parts( \@parts );
	}

	for my $p ( $entity->parts ) {
	  my $ctype = lc( $p->effective_type );

	  # deal with bogosity
	  $ctype = "text/plain" if $ctype eq "text";

	  if ( $ctype =~ m|^text/| ) {
		my $fh = $p->bodyhandle->open( "r" );
		my $line;
		if ( $ctype eq "text/plain" ) {
		  $ret .= "<pre>\n";
		}

		# have to use this interface to get the mimedecoded version
		while ( defined( $line = $fh->getline )) {
		  if ( $ctype eq "text/plain" ) {
			$line =~ s/\</\&lt;/g; # this is all you need?
			#$line =~ s/\>/\&gt;/g;
			#$line =~ s/\&/\&amp;/g;
		  }
		  $ret .= $line;
		}

		# close the file, thanks.
		$fh->close;

		if ( $ctype eq "text/plain" ) {
		  $ret .= "</pre>\n";
		}

		# now discard the inhaled bodypart
		unlink( $p->bodyhandle->path );

	  } else {
		# not a text type.
		my $b = $p->bodyhandle;
		if ( defined( $b )) {
		  my $href = $b->path;
		  $href =~ s|^$file-html/||;
		  my $desc = "";
		  my $parthead = $p->head;
		  if ( defined( $parthead )) {
			$desc = $parthead->get( "Content-Description" );
		  }
		  $desc ||= $href;

		  # If it's an image, inline it.
		  if ( $ctype =~ m|^image/| ) {
			$ret .= qq(<img src="$href">);
		  } else {
			$ret .= qq(<a href="$href">$desc</a> [$ctype]);
		  }

		} else {
		  if (( $ctype eq "message/rfc822" ) ||
			  ( $ctype eq "multipart/alternative" )) {
			$ret .= "<blockquote>\n";

			# sillywalking
			my @msg = map { $_ .= "\n" } split( /\n/, $p->stringify_body );

			$ret .= message_to_html( new Mail::Internet( \@msg ),
									 $file, $i, $count, 1 );
			$ret .= "</blockquote>\n";

		  } else {
			die "<p>Part: " . $ctype . "</p>\n";
		  }
		}
	  }
	  rmdir( $output_dir ); # may fail, may not.
	}
  } else {
	# PARSE FAILED
	die "PARSE FAILED: $!\n";
  }

  # Trailer
  if ( !$nottop ) {
	$ret .= "</body>\n";
  }

  $ret;
}

# Attempt to figure out the REAL from address, discard junk headers, etc.
sub clean_message {
  my ( $message ) = shift;
  my $head = $message->head;
  my $body = $message->body;

  $head->replace( "To", "The DSPsrv" );

  # Successive versions of DSPsrv
  my $from = $head->get( "X-Orig-From" );
  $from ||= $head->get( "X-Original-From" );
  $from ||= $head->get( "From" ); # Last resort!
  $head->replace( "From", $from );

  # Delete senders; we're not getting anything useful there.
  $head->delete( "Sender" );

  # Other things we don't need.
  $head->delete( "Content-MD5" );
  $head->delete( "Content-MD5-Origin" );
  $head->delete( "Precedence" );
  $head->delete( "Priority" );
  $head->delete( "Received" );
  $head->delete( "Reply-To" );
  $head->delete( "Return-Path" );
  $head->delete( "Status" );
  $head->delete( "X-Accept-Language" );
  $head->delete( "X-Authentication-Warning" );
  $head->delete( "X-Brought-To-You-By" );
  $head->delete( "X-Charset" ); # could use?
  $head->delete( "X-Cc" );
  $head->delete( "X-Envelope-To" );
  $head->delete( "X-Filtered-By" );
  $head->delete( "X-Mailer" );
  $head->delete( "X-Msmail-Conversation-Id" );
  $head->delete( "X-Msmail-Fixed-Font" );
  $head->delete( "X-Msmail-Message-Id" );
  $head->delete( "X-No-Archive" );
  $head->delete( "X-Orig-From" );
  $head->delete( "X-Original-From" );
  $head->delete( "X-Orig-To" );
  $head->delete( "X-Pop3-RCPT" );
  $head->delete( "X-Priority" );
  $head->delete( "X-Sender" );
  $head->delete( "X-Sponsored-By" );
  $head->delete( "X-Uidl" );
  $head->delete( "X-VM-v5-data" );

  new Mail::Internet( Header=>$head, Body=>$body );
}

sub subject_clean {
  my $subject = shift;

  # BIG hackety: convert "Re: foo (was Re: bar)" to "bar"
  #$subject =~ s/^.*\(was re:\s*(.*)\)$/$1/i;
  # Hmm. Not sure about using this.

  # hackety
  $subject =~ s/\[?dspsrv[:\s\]]*//ig;

  # Remove RE: etc.
  $subject =~ s/$re_exp//i;
  $subject =~ s/\s?\(fwd\)$//i;

  # Do these last, in case the other ops leave trailing or leading space.
  $subject =~ s/^\s+//;
  $subject =~ s/\s+$//;

  $subject;
}

# STEP 4
sub prune_empties {
  my ( $container ) = @_;
  my $message = $container->message;
  my $empty = 1;
  my $parent = $container->parent;
  my @children;

  # 4 A - Empty with no children, nuke.
  # 4 B - Empty with children, promote unless it makes them root, unless
  #       there's only one child.

  # Check for emptiness
  if ( defined( $message ) && defined( $message->message )) {
	$empty = 0;
  }

  for my $c ( $container->child ) {
	if ( !defined( $c ) || !$container->is_child( $c )) {
	  next;
	} else {
	  if ( !prune_empties( $c )) {
		$container->delete_child( $c );
	  }
	}
  }

  @children = $container->child; # reload

  if ( $empty && (( $#children == 0 ) || defined( $parent ))) {
	for my $c ( @children ) {
	  if ( $parent ) {
		$parent->child( $c );
		$container->delete_child( $c );
	  } # hack to promote to root
	  $c->parent( $parent );
	}
  } else {
	$empty = 0; # leave a branch for the non-empties to hang off
  }

  @children = $container->child; # once more for the road

  if ( $empty ) {
	if ( $#children == 0 ) {
	  return $children[ 0 ]; # hack to promote to root.
	} else {
	  return undef;
	}
  } else {
	return $container;
  }
}
