From OSR
#!/usr/bin/perl
# $Id: rss2latex,v 1.2 2007/01/07 14:23:33 hudson Exp $
#
# Turn an RSS feed into a PDF document
use warnings;
use strict;
use FileHandle;
use XML::RSS::Parser;
use HTML::Latex;
use Data::Dumper;
my $MAX_ITEMS = 20;
my $latex_file = "/tmp/rss-feeds.tex";
open LATEX, ">$latex_file"
or die "Unable to open $latex_file: $!\n";
my $date = localtime;
my $p = XML::RSS::Parser->new;
my $lp = HTML::Latex->new;
# Turn off some tags
$lp->ban_tag( 'emph' );
$lp->ban_tag( 'div' );
$lp->ban_tag( 'img' );
print LATEX <<END_OF_TEX;
\\documentclass[12pt]{book}
\\usepackage[colorlinks]{hyperref}
\\usepackage{times}
\\usepackage[a6paper,vcentering,top=30pt,bottom=5pt,left=0pt,right=0pt]{geometry}
\\usepackage[Lenny]{fncychap}
\\begin{document}
% Enable 14-pt fonts (good size for Sony)
\\large
\\title{Blogs for $date}
% Create the PDF meta data
\\pdfinfo {
/Title (Blogs for $date)
/Author (rss2pdf)
}
END_OF_TEX
while(<>)
{
chomp;
my $feed = $p->parse_uri( $_ )
or warn "Unable to parse URL $_\n"
and next;
my $title = $feed->query( '/channel/title' )->text_content;
$title = $lp->parse_string( $title );
my $count = $feed->item_count;
print STDERR "$title ($count items)\n";
print LATEX <<"";
\\chapter{$title}
# Only print up to the maximum of $MAX entries
my $section = 0;
for my $i ($feed->query( '//item' ) )
{
last if $section++ > $MAX_ITEMS;
my $node_title = $i->query( 'title' )->text_content;
my $description = $i->query( 'description' )->text_content;
my $text = $lp->parse_string( $description );
$node_title = $lp->parse_string( $node_title );
# Strip any \\ lines. HTML::Latex adds them
# for some reason
$text =~ s/\\\\//g;
$text =~ s/\&/\&/g;
print LATEX <<"";
\\section{$node_title}
$text
}
}
print LATEX <<"";
\\end{document}
close LATEX;
system pdflatex => $latex_file;
system pdflatex => $latex_file;
__END__