1 |
#!/usr/bin/env python |
2 |
#----------------------------------------- |
3 |
# $Id: fetch_rss.py,v 1.4 2006/03/22 05:47:54 migurski Exp $ |
4 |
# |
5 |
# vim: ts=4 foldcolumn=4 foldmethod=marker |
6 |
# |
7 |
# This file is part of Reblog: http://reblog.org |
8 |
# A derivative work of Feed On Feeds: http://feedonfeeds.com |
9 |
# |
10 |
# Distributed under the Gnu Public License, see LICENSE |
11 |
# |
12 |
# Copyright 2004 Michael Frumin, Michal Migurski |
13 |
# mike@stamen.com, http://stamen.com |
14 |
# mfrumin@eyebeam.org, http://eyebeam.org |
15 |
# |
16 |
# Feed parser of last resort: if Reblog's usual Magpie parser is unable |
17 |
# to make sense of an RSS source, Mark Pilgrim's Universal Feed Parser is |
18 |
# called in for duty. This file acccepts an RSS URL as an argument, and |
19 |
# returns a representation of the RSS data in JSON format. |
20 |
#----------------------------------------- |
21 |
import sys, urllib |
22 |
|
23 |
sys.path.insert(0, 'library/json-py') |
24 |
import json |
25 |
|
26 |
sys.path.insert(0, 'library/feedparser') |
27 |
import feedparser |
28 |
|
29 |
# (source, destination) for each attribute of a feed |
30 |
feed_attrs = [('title', 'title'), ('link', 'link'), ('tagline', 'description')] |
31 |
|
32 |
# (source, destination) for each attribute of an item |
33 |
item_attrs = [('id', 'guid'), ('link', 'link'), ('title', 'title'), ('summary', 'description'), |
34 |
('author', 'author'), ('category', 'category'), ('modified', 'modified')] |
35 |
|
36 |
# read the feed URL from the command line and pass it through the feedparser |
37 |
try: |
38 |
feed_url = sys.argv[-1].strip() |
39 |
feed_src = urllib.urlopen(feed_url).read() |
40 |
feed_obj = feedparser.parse(feed_src) |
41 |
except IOError: |
42 |
sys.exit("Failed to retrieve feed from URL") |
43 |
|
44 |
# create the final feed object |
45 |
feed = {'channel': {}, 'items': []} |
46 |
|
47 |
# assign channel properties |
48 |
for (src, dest) in feed_attrs: |
49 |
try: |
50 |
feed['channel'][dest] = getattr(feed_obj.feed, src).encode('UTF-8') |
51 |
except: |
52 |
pass |
53 |
|
54 |
# assign item properties for each item |
55 |
for entry_src in feed_obj.entries: |
56 |
|
57 |
item = {} |
58 |
|
59 |
for (src, dest) in item_attrs: |
60 |
try: |
61 |
item[dest] = getattr(entry_src, src).encode('UTF-8') |
62 |
except: |
63 |
pass |
64 |
|
65 |
feed['items'].append(item) |
66 |
|
67 |
# return the result |
68 |
sys.stdout.write(json.write(feed)) |