Warning: Parameter 1 to Language::getMagic() expected to be a reference, value given in /opt/local/apache2/htdocs/wiki/includes/StubObject.php on line 58
Bloglines2pdf - OSR

Bloglines2pdf

From OSR

Jump to: navigation, search
#!/usr/bin/perl
# $Id: bloglines2pdf,v 1.2 2007/01/08 04:36:26 hudson Exp $
#
# Turn an Bloglines feed into a PDF document
#
use warnings;
use strict;
use FileHandle;
use XML::RSS::Parser;
use HTML::Latex;
use Data::Dumper;
use LWP::UserAgent;
use Getopt::Long;

my $MAX_ITEMS   = 20;

my $bloglines_user;
my $bloglines_passwd;
my $bloglines_sub       = 0;
my $bloglines_mark      = 0;

my $usage = <<"";
Usage: $0 [options]
        -u | --username U       Bloglines username (Required)
        -p | --password P       Bloglines password (Required)
        -s | --sub N            Subscription number (0 for all)
        -m | --mark             Mark as read

GetOptions(
        "u|username=s"          => \$bloglines_user,
        "p|password=s"          => \$bloglines_passwd,
        "s|sub=i"               => \$bloglines_sub,
        "m|mark+"               => \$bloglines_mark,
) or die $usage;

die $usage
        unless defined $bloglines_user and defined $bloglines_passwd;



my $url = sprintf
        "http://rpc.bloglines.com/getitems?s=%d&n=%d",
        $bloglines_sub,
        $bloglines_mark,
        ;

my $req = HTTP::Request->new(GET => $url );
$req->authorization_basic(
        $bloglines_user,
        $bloglines_passwd
);

my $ua = LWP::UserAgent->new(
        agent                   => 'rss2pdf',
);

my $html = $ua->request( $req )->as_string;

# Store a temp file
open HTML, ">/tmp/bloglines-feeds.xml";
print HTML $html;
close HTML;


#
# Now that we have the RSS file from bloglines, let's generate our
# LaTeX file and start feeding it into it.
# One chapter per channel,
# One section per item
# 
my $latex_file = "/tmp/bloglines-feeds.tex";
open LATEX, ">$latex_file"
        or die "Unable to open $latex_file: $!\n";

print LATEX <<END_OF_TEX;
\\documentclass[12pt]{book}
\\usepackage[colorlinks]{hyperref}
\\usepackage{times}
\\usepackage[a6paper,vcentering,top=30pt,bottom=5pt,left=0pt,right=0pt]{geometry}
\\usepackage[Lenny]{fncychap}

\\begin{document}

% Enable 14-pt fonts (good size for Sony)
\\large

% Create the PDF meta data
\\pdfinfo {
        /Title          (Blogs for $bloglines_user)
        /Author         (rss2pdf)
}

\\title{Blogs for\\
$bloglines_user}
\\author{Generated by rss2pdf}
\\maketitle


END_OF_TEX


my $lp = HTML::Latex->new;

# Turn off some tags
#$lp->ban_tag( 'emph' );
#$lp->ban_tag( 'div' );
$lp->ban_tag( 'img' );


# All of these RSS parsers are bone headed.  They don't handle
# the multiple channels in one feed, so we have to fake it for them.
# Sigh...

my $rss_header = <<'';
<?xml version="1.0"?>
<rss version="2.0"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
  xmlns:bloglines="http://www.bloglines.com/services/module"
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<channel>


my $rss_footer = <<'';
</channel>
</rss>


while( $html =~ m!<channel>(.*?)</channel>!msg )
{
        my $new_html = $rss_header . $1 . $rss_footer;
        my $p = XML::RSS::Parser->new;
        my $feed = $p->parse_string( $new_html )
                or die "Unable to parse feed from bloglines?\n";

        my $blog = $feed->query( '/channel/title' )->text_content;
        my $count = $feed->item_count;
        print STDERR "Blog='$blog' ($count entries)\n";

        next if $blog =~ /^OSR -/;

        print LATEX <<"";
\\chapter{$blog ($count)}

        for my $it ( $feed->query( '//item' ) )
        {
                my $title = $it->query( 'title' )->text_content;
                print STDERR "\t$title\n";

                my $it_html = $it->query('description')->text_content;

                # Strip any bad bits
                $it_html =~ s!<script.*?/script>!!msg;

                my $text = $lp->parse_string( $it_html );

                # Strip any \\ lines.  HTML::Latex adds them
                # for some reason
                $text =~ s/\\\\//g;
                $text =~ s/\&/\&/g;
                $text =~ s/\\hline/\\hrule/g;

                print LATEX <<"";
\\section{$title}
$text

        }
}


print LATEX <<"";
\\end{document}

close LATEX;

#
# Run pdflatex twice to ensure that the references and TOC is
# correctly built.
system pdflatex => $latex_file;
system pdflatex => $latex_file;

__END__