#!/usr/bin/perl -w use XML::RSS; use LWP::UserAgent; use DateTime; use DBI; use Date::Manip; use WWW::Shorten::TinyURL; my $rss = new XML::RSS; my $ua = new LWP::UserAgent; $RSSFeed = "http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/front_page/rss.xml"; # not sure if necessary.. $ua->default_headers->push_header('Cache-Control' => "must-revalidate"); my $resp = $ua->get($RSSFeed); if ($resp->is_success) { $content = $resp->content; } else { print "Could not retrieve $RSSFeed"; } # parse the RSS content $rss->parse($content); my $text_content = ""; # get the channel items my @items = @{$rss->{items}}; $dbh = DBI->connect('DBI:mysql:twitternews:localhost','username','password') or die "Unable to connect to database: $dbh->errstr\n"; my $q_check = $dbh->prepare("select lastchecked from checkdate"); $q_check->execute(); my $last_date = ParseDate($q_check->fetchrow()); $q_check->finish(); # reverse sort by pubDate (oldest first) @orderedItems = sort sortByDateRev @items; foreach my $item (@orderedItems) { next unless defined($item->{'description'}) && defined($item->{'link'}); my $pub_dt = $item->{'pubDate'}; $mydate = ParseDate($pub_dt); # check if this is a new item $flag = Date_Cmp($last_date,$mydate); if ($flag < 0) { # sacrifice the title in order to add tinyurl to content # $text_content = $item->{'title'}.": "; $text_content = $item->{'description'}; # if necessary truncate description if (length($text_content) > 124) { $text_content = substr($text_content,0,121); $text_content .= ".. "; } else { $text_content .= " "; } # add tinyurl link to content $text_content .= makeashorterlink($item->{'link'}); # post to twitter my $ua2 = new LWP::UserAgent; my $posturl = "http://twitter.com/statuses/update.xml"; $ua2->credentials( 'twitter.com:80', 'Twitter API', 'username' => 'password', ); my $response = $ua2->post( $posturl, [ status => $text_content, ] ); $response->is_success or die "Login failed: ", $response->status_line, "\n"; } } # store current date/time in db # --- this seems to cause items to be missed, if they get published after the # publish date they claim. so now store the pubDate of the most recent item, which # should avoid missing news items # my $curr_dt = DateTime->now(time_zone=>'Europe/London'); # use MySql DATETIME format my $db_dt = &UnixDate($mydate, "%Y-%m-%d %H:%M:%S"); $dbh->do('update checkdate set lastchecked="'.$db_dt.'" where 1'); $dbh->disconnect(); sub sortByDateRev() { $adate = ParseDate($a->{pubDate}); $bdate = ParseDate($b->{pubDate}); return Date_Cmp($adate,$bdate); }