#!/usr/bin/perl
#
# compile some RSS feeds into a single page
#
use CGI qw( fatalsToBrowser -debug );
use XML::RSS;
use Date::Parse;
use POSIX;
use Data::Dumper;
use Digest::MD5;
use URI::Escape;
use Storable;
use DBI;
use DBD::mysql;

my $query = new CGI;
print $query->redirect( 'http://www.waider.ie/cgi-bin/sqlrss.pl' );
exit;
my $splitscreen = $query->param( "splitscreen" );
$splitscreen ||= 0;
my $cachedir = $ENV{"BASEDIR"};
$cachedir ||= "/var/tmp/rss-cache-" . $ENV{"LOGNAME"};

# Q&D config file
open( C, "/var/tmp/rss.conf" ) or die "conf: $!";
my @conf;
while (<C>) {
    chomp;
    push @conf, $_;
}
close( C );

my ( $host, $database, $username, $password ) = @conf;
my $dbh = DBI->connect( "DBI:mysql:host=$host:database=$database", $username, $password )
  or die $DBI::errstr;

# Generate a session key
my $id = $query->cookie('rssid');
my $seenbefore;
if ( !defined( $id ) or !$id ) {
    my $md5 = new Digest::MD5;
    my $remote = $ENV{REMOTE_ADDR} . $ENV{REMOTE_PORT};
    $id = $md5->md5_base64( time, $$, $remote );
    $id =~ tr|+/=|-_.|;  # Make non-word chars URL-friendly
    $seenbefore= "";
} else {
    $seenbefore = "Welcome back! ";
}

my $seen;
my %thisrun;
eval {
    -d "$cachedir" or mkdir "$cachedir", 0755;
    -d "$cachedir/profiles" or mkdir "$cachedir/profiles", 0755;
    $seen = retrieve( "$cachedir/profiles/$id" );
};

if ( !defined( $seen )) {
    $seen = {};
}

my $debug = $ENV{'DEBUG'} || 0;
my @feeds = $query->param( "feeds" );
my $cmd = $query->param( "cmd" );
my $dump = $query->param( "dump" );
my %rss;

$| = 1;

$cmd ||= "";

my %entries;

if ( $query->param( "zapem" )) {
    $cmd = "zap";
}

if (( !@feeds and !$cmd ) or ( $cmd eq "list" ) or ( $cmd eq "zap" )
    or ( $cmd eq "zapafter" )) {
    my $feeds = $dbh->selectall_arrayref( 'SELECT name FROM feeds' )
      or die $DBI::errstr;

    for my $f ( @{$feeds} ) {
        push @feeds, $f->[0];
    }
} else {
    print STDERR "something's gotten hold of my arse\n";
}

#map { $_ = $basedir . "/$_" } @feeds;

if ( $query->param( "opml" )) {
    print $query->header( -Content_Type => "text/plain" );
    print "<?xml version=\"1.0\"?>\n";
    print "<?opml version=\"1.1\"?>";
    print "<head><title>my subscriptions</title></head><body>";
    for my $feed ( @feeds ) {
        next if $feed =~ /\..*$/;
        my $rss = new XML::RSS;
        eval {
            next unless $rss->parsefile( $feed );
        };
        if ( $@ ) {
            next;
        }

        my $channel = $rss->{channel};

        printf( "<outline version=\"RSS\" description=\"%s\" language=\"%s\" title=\"%s\" xmlURL=\"%s\" text=\"%s\" type=\"%s\" htmlUrl=\"%s\"></outline>",
                ( $channel->{description}||""),
                "en",
                ( $channel->{title}||""),
                "",
                ( $channel->{title}||""),
                "",
                ( $channel->{link}||""));
    }
    print "</body></opml>";
}

my $cookie = $query->cookie( -name => 'rssid',
                             -value => $id,
                             -path => '/',
                             -expires => '+1y' );

print $query->header( -charset => 'utf-8', -cookie => $cookie );

if ( $splitscreen eq "frameset" ) {
    print "<frameset rows=\"50%,50%\">\n";
    print "<frame src=\"rss.pl?splitscreen=index\">\n";
    print "<frame src=\"rss.pl?splitscreen=content\" name=\"content\">\n";
    print "</frameset>";
    exit;
} elsif ( $splitscreen eq "content" ) {
    my $article = $query->param( "article" );
    print "ARTICLE BITS";
    exit;
}

my $target = $splitscreen ? "target=\"content\" " : "";
my $form = "<form>";
if ( $cmd ne "list" ) {
    $form = "<form method=\"post\">";
}
my $hdate = strftime( "%B %d at %H:%M:%S", localtime( time ));
my $rcsid = '$Id: rss.pl,v 1.22 2005/09/04 11:18:14 waider Exp $';
print <<"EOF";
<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">
<html>
  <head>
    <link rel="stylesheet" href="/~waider/waider.css">
    <title>RSS Reader</title>

    <script>
      function expandCollapse() {
        for (var i=0; i < expandCollapse.arguments.length; i++) {
          var element = document.getElementById(expandCollapse.arguments[i]);
          element.style.display = (element.style.display == "none") ? "block" : "none";
        }
      }
    </script>

    <!-- $rcsid -->
  </head>
  <body>
<h2>$ {seenbefore}This page generated on $hdate.</h2>
$form
<dl style="border: 1px; border-color: white; border-style: solid; padding: 3px">
EOF

if ( $cmd ) {
    if ( $cmd eq "list" ) {
        if ( !@feeds ) {
            print "<dt>Either no feeds, or I can't load 'em!</dt></dl>";
            exit;
        }
        print "<dt>Feeds List</dt><dt><hr></dt>";
    } elsif ( $cmd eq "zap" or $cmd eq "zapafter" ) {
        my $e = $query->param( "article" );
        if ( defined( $e )) {
            $e = uri_unescape( $e );
            $seen->{$e} = "zap " . ( $seen->{$e}||"" );
            #print "<dt>Zapped $e</dt><dt><hr></dt>" if $debug;
        } else {
            for my $e ( $query->param()) {
                if ( $e =~ /^zap_(.*)$/ ) {
                    $e = uri_unescape( $1 );
                    $seen->{$e} = "zap " . ( $seen->{$e} || "" );
                    print "<dt>Zapped $e</dt>";
                } else {
                    # debug
                    #print "<dt>Skipping $e</dt>";
                }
            }
        }
    } else {
        $cmd = quotemeta( $cmd );
        print "<dt>Unknown command $cmd</dt></dl>";
        exit;
    }
}

while ( @feeds ) {
    my $file = shift @feeds;
    next if $file =~ /\..*$/;   # for my own special abuse

    my $feeddata =
      $dbh->selectall_arrayref( 'SELECT feedid, name, url, ' .
                                'UNIX_TIMESTAMP(lastupdate), rawfeed, ' .
                                'etag, modified, cleanfeed ' .
                                'FROM feeds WHERE name=?', undef,
                                $file );
    next unless $feeddata and @{$feeddata};
    my %thisfeed =
      (
       feedid     => $feeddata->[0]->[0],
       name       => $feeddata->[0]->[1],
       url        => $feeddata->[0]->[2],
       lastupdate => $feeddata->[0]->[3],
       rawfeed    => $feeddata->[0]->[4],
       etag       => $feeddata->[0]->[5],
       modified   => $feeddata->[0]->[6],
       cleanfeed  => $feeddata->[0]->[7],
      );

    if ( !($thisfeed{cleanfeed}||'')) {
        print "<dt>$file is waiting for update</dt>\n";
        next;
    }

    my ( $feedfile ) = $file =~ m@.*/([^/]+)$@;

    my $rss = new XML::RSS;
    # these mofos have already been parsed by XML::RSS so they should be
    # somewhat compliant, dammit.
    eval {
        next unless $rss->parse( $thisfeed{cleanfeed} );
    };
    if ( $@ ) {
        print STDERR "Error parsing $file:\n";
        print STDERR "$@\n";
        if ( open( PARP, ">$cachedir/" . $thisfeed{name} . ".broke" )) {
            print PARP $thisfeed{cleanfeed};
            close( PARP );
        }
        next;
    } else {
        unlink(  $cachedir . "/" . $thisfeed{name} . ".broke" );
    }

    my $lastupated = $thisfeed{lastupate};

    my $channel = $rss->{channel};
    if ( $rss->{image} ) {
        $channel->{image} = $rss->{image};
    }
    next unless $channel;
    print STDERR "Working on [" . $channel->{title} . "]\n" if $debug;
    my $feed = $channel->{title};
    $rss{$feed} = $rss;

    my $date = "";
    if ( defined( $channel->{dc} ) and defined( $channel->{dc}->{date} )) {
		$date = $channel->{dc}->{date};
    }

    if ( defined( $channel->{pubDate} )) {
        $date ||= $channel->{pubDate};
    }

    if ( defined( $rss->{items}->[0]->{dc}) and defined( $rss->{items}->[0]->{dc}->{date})) {
        $date ||= $rss->{items}->[0]->{dc}->{date};
    }

    $date ||= scalar(gmtime($lastupdated));

    $date = stupid_rss_date( $date, $lastupdated );

    @items = @{$rss->{items}};

    if ( $cmd eq "list" ) {
        print "<dt><input type=\"checkbox\" name=\"feeds\" value=\"$feedfile\">$feed</dt>\n";
        print "<dd>Last updated: " . scalar( gmtime( $date )) . "<br>\n";
        print "Source URL: <a href=\"" . $channel->{link} . "\">" . $channel->{link} . "</a><br>\n" if $channel->{link};
        print "Description: " . $channel->{description} . "<br>\n" if $channel->{description};
        print "Items: " . scalar( @items ) . "<br>\n";
        print "<br>\n";
        print "</dd>\n";
        next;
    }

    if ( !@items ) {
        print STDERR "No items!\n";
        next;
    } else {
        print STDERR "  " . scalar( @items ) . " items\n" if $debug;
    }

    for my $item ( @items ) {
        my $itemdate = $lastupdated;
        my $trydate;

        if ( defined( $item->{dc} )) {
            if ( defined( $item->{dc}->{date})) {
                if ( defined( $item->{dc}->{date} )) {
                    $trydate = stupid_rss_date( $item->{dc}->{date} );
                    if ( !defined( $trydate ) or !$trydate ) {
                        print STDERR "   failed to parse date" .
                          $item->{dc}->{date} . "\n" if $debug;
                    } else {
                        $itemdate = $trydate;
                    }
                }
            }
            $item->{dc}->{creator} = ""
              if !defined( $item->{dc}->{creator});
        }

        my $index = 0;
        my $itemname = sprintf( "%d_%03d_%s", $itemdate, $index, $feed );
        while ( 1 ) {
            $itemname = sprintf( "%d_%03d_%s", $itemdate, $index, $feed );
            last if !defined( $entries{ $itemname });
            $index++;
        }

        $entries{$itemname} = $item;
        print STDERR "  Added $itemname to list\n" if $debug;
    }
}

my $oldday;
my $oldchannel = "";

my $idx = 1;
my $seenzap = 0;
for my $entry ( sort { $b <=> $a } keys %entries ) {
    my ( $date, $index, $feed ) = split( '_', $entry );
    my $item = $entries{$entry};
    my $rss = $rss{$feed};
    my $image = $rss->{channel}->{image};
    my $channel = $rss->{channel};
    my $title = $feed;

    # skip anything over a month old
    last if ( time - $date ) > 60 * 60 * 24 * 30;

    # skip anything we've marked as zapped.
    if ( defined( $seen->{$entry})) {
        if ( $cmd eq "zapafter" ) {
            my $e;
            if ( !$seenzap ) {
                my $e = $query->param( "article" );
                if ( defined( $e )) {
                    $e = uri_unescape( $e );
                    if ( $entry eq $e ) {
                        $seen->{$entry} = "zap $hdate";
                        $seenzap = 1;
                    }
                }
            } else {
                $seen->{$entry} = "zap $hdate";
            }
        }

        if ( $seen->{$entry} =~ /^zap/ ) {
            $seen->{$entry} = "zap $hdate";
            $thisrun{$entry} = $hdate;
            next;
        }
    }

    # visual break between days
    if ( defined( $oldday )) {
        if ( $oldday ne strftime( '%d', localtime( $date ))) {
            print <<"EOT";
</dl><dl style="border: 1px; border-color: white; border-style: solid; padding: 3px">
EOT
            print strftime( "<dt class=\"greyback\" style=\"text-align: center\">%a, %b %d %Y</dt>\n", localtime( $date ));
            print <<"EOT";
</dl><dl style="border: 1px; border-color: white; border-style: solid; padding: 3px">
EOT
            $oldchannel = "";
        }
    } else {
        print strftime( "<dt class=\"greyback\" style=\"text-align: center\">%a, %b %d %Y</dt>\n", localtime( $date ));
        print <<"EOT";
</dl><dl style="border: 1px; border-color: white; border-style: solid; padding: 3px">
EOT
    }
    $oldday = strftime( '%d', localtime( $date ));

    # the horror, the horror
    if ( $oldchannel and ( $oldchannel ne $channel )) {
        print <<"EOT";
</dl><dl style="border: 1px; border-color: white; border-style: solid; padding: 3px">
EOT
        print "<dt>";
        if ( $image and $image->{url}) {
            print "<img align=\"right\" src=\"" . $image->{url} . "\">";
        }
        print "<a ${target}href=\"" . $channel->{link} . "\">" if $channel->{link};
        print $channel->{title}||$title;
        print "</a>" if $channel->{link};
        print "</dt>";
    }

    if ( !$oldchannel ) {
        print "<dt>";
        if ( $image and $image->{url}) {
            print "<img align=\"right\" src=\"" . $image->{url} . "\">";
        }
        print "<a ${target}href=\"" . $channel->{link} . "\">" if $channel->{link};
        print $channel->{title}||$title;
        print "</a>" if $channel->{link};
        print "</dt>";
    }

    $oldchannel = $channel;
    my $byline = "";
    if ( $item->{dc}->{creator}) {
        $byline = " (" . $item->{dc}->{creator} . ")";
    }

    print "<dt>";
    print strftime( '%H:%M:%S', localtime( $date ));

    print " [";
    print "<input type=\"checkbox\" name=\"zap_" . uri_escape( $entry ) .
      "\">";
    print "<a href=\"/cgi-bin/rss.pl?cmd=zap&article="
      . uri_escape( $entry ) . "#$idx\">";
    print "zap";
    print "</a>";
    print "]";

    $item->{link} ||= $channel->{link}; # more horror
    print ": <a ${target}href=\"" . $item->{link} . "\" name=\"$idx\">" . ( $item->{title} || $channel->{description} ) . "</a> ";

    if ( defined( $seen->{$entry})) {
        print "[seen on " . $seen->{$entry} . "] ";
    } else {
        $seen->{$entry} = $hdate;
    }
    $thisrun{$entry} = $hdate;

    print "[";
    print "<a href=\"/cgi-bin/rss.pl?cmd=zapafter&article="
      . uri_escape( $entry ) . "#$idx\">";
    print "zap after";
    print "</a>";
    print "]";

    print "</dt>\n";
    if ( $item->{description} ) {
        print "<dd class=\"justified\">";
        # safety clown says...
        $item->{description} =~ s{<blink>}{}g;
        print $item->{description} . "$byline<br><br>";
        print "</dd>\n";
    } else {
        #print "<a ${target}href=\"" . $item->{link} . "\">[go to item]</a><br><br>\n";
    }
    $idx++;
}
print "<dt style=\"text-align: right\"><input type=\"submit\" value=\"Read checked feeds\"></dt>" if $cmd eq "list";
print "<dt style=\"text-align: right\"><input type=\"submit\" name=\"opml\" value=\"Export OPML\"></dt>" if $cmd eq "list";

print "<dt style=\"text-align: right\"><input type=\"submit\" name=\"zapem\" value=\"Zap checked items\"><br><a href=\"/cgi-bin/rss.pl?cmd=list\">Select Feeds</a></dt>" if $cmd ne "list";
print "</dl>\n";
print "</form>";
print "</body>\n</html>\n";

if ( $cmd ne "list" ) {
    for my $k ( keys %{$seen} ) {
#        delete $seen->{$k} if !defined( $thisrun{$k} );
#        delete $seen->{$k} if $seen->{$k} !~ /$hdate/;
    }
}
store( $seen, "$cachedir/profiles/$id" );

sub stupid_rss_date {
    my $date = shift;
    my $lastupdated = shift;

    $lastupdated ||= 0;

    # Blogger's variation: 2003-07-16 17:44:13Z
    $date =~ s/[T ](\d+:\d+)(:\d+)?Z$/T$1+00:00/; # Z = Zulu time = GMT

    # stupid rss date format: YYYY-MM-DDTHH:SS[+-]ZZZZ
    # ISO 8601, people...
    $date =~ s/(\d)T(\d)/$1 $2/;
    $date =~ s/(\d\d):(\d\d)$/$1$2/;

    my $unixtime = str2time( $date );

    # HURGH BLARGH HONK. More Blogger stupidity.
    if ( defined( $unixtime )) {
        if ( $lastupdated > $unixtime ) {
            $unixtime = $lastupdated;
        }

        $date = $unixtime;
    } else {
        $date = "(can't parse $date)";
    }

    $date;
}
