1 |
dpavlin |
2 |
#!/usr/bin/env python |
2 |
|
|
#----------------------------------------- |
3 |
|
|
# $Id: fetch_rss.py,v 1.4 2006/03/22 05:47:54 migurski Exp $ |
4 |
|
|
# |
5 |
|
|
# vim: ts=4 foldcolumn=4 foldmethod=marker |
6 |
|
|
# |
7 |
|
|
# This file is part of Reblog: http://reblog.org |
8 |
|
|
# A derivative work of Feed On Feeds: http://feedonfeeds.com |
9 |
|
|
# |
10 |
|
|
# Distributed under the Gnu Public License, see LICENSE |
11 |
|
|
# |
12 |
|
|
# Copyright 2004 Michael Frumin, Michal Migurski |
13 |
|
|
# mike@stamen.com, http://stamen.com |
14 |
|
|
# mfrumin@eyebeam.org, http://eyebeam.org |
15 |
|
|
# |
16 |
|
|
# Feed parser of last resort: if Reblog's usual Magpie parser is unable |
17 |
|
|
# to make sense of an RSS source, Mark Pilgrim's Universal Feed Parser is |
18 |
|
|
# called in for duty. This file acccepts an RSS URL as an argument, and |
19 |
|
|
# returns a representation of the RSS data in JSON format. |
20 |
|
|
#----------------------------------------- |
21 |
|
|
import sys, urllib |
22 |
|
|
|
23 |
|
|
sys.path.insert(0, 'library/json-py') |
24 |
|
|
import json |
25 |
|
|
|
26 |
|
|
sys.path.insert(0, 'library/feedparser') |
27 |
|
|
import feedparser |
28 |
|
|
|
29 |
|
|
# (source, destination) for each attribute of a feed |
30 |
|
|
feed_attrs = [('title', 'title'), ('link', 'link'), ('tagline', 'description')] |
31 |
|
|
|
32 |
|
|
# (source, destination) for each attribute of an item |
33 |
|
|
item_attrs = [('id', 'guid'), ('link', 'link'), ('title', 'title'), ('summary', 'description'), |
34 |
|
|
('author', 'author'), ('category', 'category'), ('modified', 'modified')] |
35 |
|
|
|
36 |
|
|
# read the feed URL from the command line and pass it through the feedparser |
37 |
|
|
try: |
38 |
|
|
feed_url = sys.argv[-1].strip() |
39 |
|
|
feed_src = urllib.urlopen(feed_url).read() |
40 |
|
|
feed_obj = feedparser.parse(feed_src) |
41 |
|
|
except IOError: |
42 |
|
|
sys.exit("Failed to retrieve feed from URL") |
43 |
|
|
|
44 |
|
|
# create the final feed object |
45 |
|
|
feed = {'channel': {}, 'items': []} |
46 |
|
|
|
47 |
|
|
# assign channel properties |
48 |
|
|
for (src, dest) in feed_attrs: |
49 |
|
|
try: |
50 |
|
|
feed['channel'][dest] = getattr(feed_obj.feed, src).encode('UTF-8') |
51 |
|
|
except: |
52 |
|
|
pass |
53 |
|
|
|
54 |
|
|
# assign item properties for each item |
55 |
|
|
for entry_src in feed_obj.entries: |
56 |
|
|
|
57 |
|
|
item = {} |
58 |
|
|
|
59 |
|
|
for (src, dest) in item_attrs: |
60 |
|
|
try: |
61 |
|
|
item[dest] = getattr(entry_src, src).encode('UTF-8') |
62 |
|
|
except: |
63 |
|
|
pass |
64 |
|
|
|
65 |
|
|
feed['items'].append(item) |
66 |
|
|
|
67 |
|
|
# return the result |
68 |
|
|
sys.stdout.write(json.write(feed)) |