From OSR
#!/usr/bin/perl
# $Id: bloglines2pdf,v 1.2 2007/01/08 04:36:26 hudson Exp $
#
# Turn an Bloglines feed into a PDF document
#
use warnings;
use strict;
use FileHandle;
use XML::RSS::Parser;
use HTML::Latex;
use Data::Dumper;
use LWP::UserAgent;
use Getopt::Long;
my $MAX_ITEMS = 20;
my $bloglines_user;
my $bloglines_passwd;
my $bloglines_sub = 0;
my $bloglines_mark = 0;
my $usage = <<"";
Usage: $0 [options]
-u | --username U Bloglines username (Required)
-p | --password P Bloglines password (Required)
-s | --sub N Subscription number (0 for all)
-m | --mark Mark as read
GetOptions(
"u|username=s" => \$bloglines_user,
"p|password=s" => \$bloglines_passwd,
"s|sub=i" => \$bloglines_sub,
"m|mark+" => \$bloglines_mark,
) or die $usage;
die $usage
unless defined $bloglines_user and defined $bloglines_passwd;
my $url = sprintf
"http://rpc.bloglines.com/getitems?s=%d&n=%d",
$bloglines_sub,
$bloglines_mark,
;
my $req = HTTP::Request->new(GET => $url );
$req->authorization_basic(
$bloglines_user,
$bloglines_passwd
);
my $ua = LWP::UserAgent->new(
agent => 'rss2pdf',
);
my $html = $ua->request( $req )->as_string;
# Store a temp file
open HTML, ">/tmp/bloglines-feeds.xml";
print HTML $html;
close HTML;
#
# Now that we have the RSS file from bloglines, let's generate our
# LaTeX file and start feeding it into it.
# One chapter per channel,
# One section per item
#
my $latex_file = "/tmp/bloglines-feeds.tex";
open LATEX, ">$latex_file"
or die "Unable to open $latex_file: $!\n";
print LATEX <<END_OF_TEX;
\\documentclass[12pt]{book}
\\usepackage[colorlinks]{hyperref}
\\usepackage{times}
\\usepackage[a6paper,vcentering,top=30pt,bottom=5pt,left=0pt,right=0pt]{geometry}
\\usepackage[Lenny]{fncychap}
\\begin{document}
% Enable 14-pt fonts (good size for Sony)
\\large
% Create the PDF meta data
\\pdfinfo {
/Title (Blogs for $bloglines_user)
/Author (rss2pdf)
}
\\title{Blogs for\\
$bloglines_user}
\\author{Generated by rss2pdf}
\\maketitle
END_OF_TEX
my $lp = HTML::Latex->new;
# Turn off some tags
#$lp->ban_tag( 'emph' );
#$lp->ban_tag( 'div' );
$lp->ban_tag( 'img' );
# All of these RSS parsers are bone headed. They don't handle
# the multiple channels in one feed, so we have to fake it for them.
# Sigh...
my $rss_header = <<'';
<?xml version="1.0"?>
<rss version="2.0"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:bloglines="http://www.bloglines.com/services/module"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<channel>
my $rss_footer = <<'';
</channel>
</rss>
while( $html =~ m!<channel>(.*?)</channel>!msg )
{
my $new_html = $rss_header . $1 . $rss_footer;
my $p = XML::RSS::Parser->new;
my $feed = $p->parse_string( $new_html )
or die "Unable to parse feed from bloglines?\n";
my $blog = $feed->query( '/channel/title' )->text_content;
my $count = $feed->item_count;
print STDERR "Blog='$blog' ($count entries)\n";
next if $blog =~ /^OSR -/;
print LATEX <<"";
\\chapter{$blog ($count)}
for my $it ( $feed->query( '//item' ) )
{
my $title = $it->query( 'title' )->text_content;
print STDERR "\t$title\n";
my $it_html = $it->query('description')->text_content;
# Strip any bad bits
$it_html =~ s!<script.*?/script>!!msg;
my $text = $lp->parse_string( $it_html );
# Strip any \\ lines. HTML::Latex adds them
# for some reason
$text =~ s/\\\\//g;
$text =~ s/\&/\&/g;
$text =~ s/\\hline/\\hrule/g;
print LATEX <<"";
\\section{$title}
$text
}
}
print LATEX <<"";
\\end{document}
close LATEX;
#
# Run pdflatex twice to ensure that the references and TOC is
# correctly built.
system pdflatex => $latex_file;
system pdflatex => $latex_file;
__END__