#!/usr/bin/perl -w # Copyright (C) 2003 Sean Walberg use strict; use XML::RSS; use LWP; use LWP::UserAgent; use Digest::MD5 qw /md5_hex/; my $cfg = $ENV{'HOME'} . "/etc/checknewpages.cfg"; my $out = $ENV{'HOME'} . "/public_html/new.xml"; my $browser = LWP::UserAgent->new(); $cfg = $ARGV[0] if $ARGV[0]; (-f $cfg) or die "$cfg is no good"; # Create the rss object, and read in the existing file if it exists my $rss; if (-f $out) { $rss = new XML::RSS; $rss->parsefile($out); } else { # not there, create $rss = new XML::RSS (version => '0.91'); $rss->channel( title => "New pages", link => "http://ertw.com/", description => "Changed page watchlist" ); } open CFG, "<$cfg" or die "can't open $cfg"; my $newconf; while () { chomp; my ($url, $hash) = split /\t/; $hash = "XXX" unless $hash; my $content = do_GET($url); my $newhash = md5_hex($content); unless ($hash eq $newhash) { # changed # only 15 items pop(@{$rss->{'items'}}) if (@{$rss->{'items'}} == 15); # append the time to the url so aggregators see it as # different $rss->add_item( title=> $url, link => $url."#".time(), mode=>'insert'); } $newconf .= "$url\t$newhash\n"; } close CFG; open CFG, ">$cfg" or die "cant open $cfg"; print CFG $newconf; close CFG; $rss->save($out); sub do_GET { $browser->agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"); my $resp = $browser->get(@_); return ($resp->content, $resp->is_success, $resp) if wantarray; return unless $resp->is_success; return $resp->content; }