1 |
<?php |
2 |
// vim: ts=4 foldcolumn=4 foldmethod=marker |
3 |
/** |
4 |
* RF_Input_Controller class found here. |
5 |
* |
6 |
* This file is part of Reblog, |
7 |
* a derivative work of Feed On Feeds. |
8 |
* |
9 |
* Distributed under the Gnu Public License. |
10 |
* |
11 |
* @package Refeed |
12 |
* @license http://opensource.org/licenses/gpl-license.php GNU Public License |
13 |
* @author Michal Migurski <mike@stamen.com> |
14 |
* @author Michael Frumin <mfrumin@eyebeam.org> |
15 |
* @copyright ©2004 Michael Frumin, Michal Migurski |
16 |
* @link http://reblog.org Reblog |
17 |
* @link http://feedonfeeds.com Feed On Feeds |
18 |
* @version $Revision: 1.20 $ |
19 |
*/ |
20 |
|
21 |
/** |
22 |
* GUID generator style flag: use author & link when generating GUID |
23 |
*/ |
24 |
define('REBLOG_GUIDSTYLE_AUTHORLINK', 1); |
25 |
|
26 |
/** |
27 |
* GUID generator style flag: use author & title when generating GUID |
28 |
*/ |
29 |
define('REBLOG_GUIDSTYLE_AUTHORTITLE', 2); |
30 |
|
31 |
/** |
32 |
* GUID generator style flag: use author & content when generating GUID |
33 |
*/ |
34 |
define('REBLOG_GUIDSTYLE_AUTHORCONTENT', 4); |
35 |
|
36 |
/** |
37 |
* Item status flag: brand-new item |
38 |
*/ |
39 |
define('REBLOG_INPUT_NEW_ITEM', 5); |
40 |
|
41 |
/** |
42 |
* Item status flag: old item with changed properties |
43 |
*/ |
44 |
define('REBLOG_INPUT_UPDATED_ITEM', 6); |
45 |
|
46 |
/** |
47 |
* Item status flag: old item with no changes |
48 |
*/ |
49 |
define('REBLOG_INPUT_UNCHANGED_ITEM', 7); |
50 |
|
51 |
/** |
52 |
* Item behavior flag: mark items that have been modified as unread |
53 |
*/ |
54 |
define('REBLOG_INPUT_FRESHEN_UPDATED_ITEMS', false); |
55 |
|
56 |
/** |
57 |
* RF_Input_Controller handles all global feed subscription nuts & bolts. |
58 |
* |
59 |
* RF_Input_Controller tries to avoid user-specific subscription actions, |
60 |
* which should be handled by {@link RF_Controller RF_Controller} where possible. |
61 |
*/ |
62 |
class RF_Input_Controller |
63 |
{ |
64 |
/** |
65 |
* Reference to operative controller object. |
66 |
* @var RF_Controller |
67 |
*/ |
68 |
var $controller; |
69 |
|
70 |
/** |
71 |
* @param RF_Controller $controller - |
72 |
* |
73 |
* @uses RF_Input_Controller::$controller Assigned on instantiation, from {@link RF_Controller $controller}. |
74 |
*/ |
75 |
function RF_Input_Controller(&$controller) |
76 |
{ |
77 |
if(strtolower(get_class($controller)) != strtolower('RF_Controller')) |
78 |
die("RF_Input_Controller must be instantiated with an RF_Controller."); |
79 |
|
80 |
$this->controller =& $controller; |
81 |
} |
82 |
|
83 |
/** |
84 |
* Subscribes a user to a feed, based on the feed's URL. |
85 |
* |
86 |
* First checks whether the requested feed exists for any user, |
87 |
* and adds it if not. Then cheks whether $user is subscribed to |
88 |
* that feed, and subscribes $user if not. |
89 |
* |
90 |
* Marks the whole feed unread if it did not exist previously. |
91 |
* |
92 |
* @param RF_User $user User to subscribe. |
93 |
* @param string $url Location of feed data. |
94 |
* @param boolean $published Whether to auto-publish this feed. |
95 |
* @param array $tags Array of tags |
96 |
* |
97 |
* @return mixed New {@link RF_Feed feed} on success, false otherwise. |
98 |
* |
99 |
* @uses RF_Input_Controller::$controller Passed to {@link RF_Userdata_Controller new RF_Userdata_Controller}. |
100 |
* @uses RF_Controller::feedExistsWithURL() |
101 |
* @uses RF_Input_Controller::subscribeToFeedURL() |
102 |
* @uses RF_Userdata_Controller::RF_Userdata_Controller() |
103 |
* @uses RF_Userdata_Controller::markFeedPublished() |
104 |
* @uses RF_Controller::userSubscribedToFeed() |
105 |
* @uses RF_Controller::subscribeUserToFeed() |
106 |
* @uses RF_Controller::getFeedItems() |
107 |
* @uses RF_Userdata_Controller::markItemsUnread() |
108 |
* @uses RF_Input_Controller::updateFeed() |
109 |
* @uses RF_Controller::getFeedItems() |
110 |
* @uses RF_Userdata_Controller::setFeedTags() |
111 |
*/ |
112 |
function subscribeUserToFeedURL(&$user, $url, $published=false, $tags=array()) |
113 |
{ |
114 |
$existing_feed = false; |
115 |
|
116 |
if($feed = $this->controller->feedExistsWithURL($url)) { |
117 |
print_trace("This feed already exists: <a href=\"{$feed->url}\">{$feed->title}</a>"); |
118 |
$existing_feed = true; |
119 |
|
120 |
} elseif($feed = $this->subscribeToFeedURL($url)) { |
121 |
print_trace("This feed was added: <a href=\"{$feed->url}\">{$feed->title}</a>"); |
122 |
|
123 |
} else { |
124 |
print_trace("This feed could not be added: <a href=\"{$url}\">{$url}</a>"); |
125 |
return false; |
126 |
|
127 |
} |
128 |
|
129 |
$userdata_controller = new RF_Userdata_Controller($this->controller, $user); |
130 |
|
131 |
if($published) |
132 |
$userdata_controller->markFeedPublished($feed); |
133 |
|
134 |
if($this->controller->userSubscribedToFeed($user, $feed)) { |
135 |
print_trace("This feed is already subscribed to: <a href=\"{$feed->url}\">{$feed->title}</a>"); |
136 |
|
137 |
} elseif($this->controller->subscribeUserToFeed($user, $feed)) { |
138 |
print_trace("This feed is now subscribed to: <a href=\"{$feed->url}\">{$feed->title}</a>"); |
139 |
$this->updateFeed($feed); |
140 |
|
141 |
if($existing_feed) |
142 |
$userdata_controller->markItemsUnread($this->controller->getFeedItems($feed)); |
143 |
|
144 |
} else { |
145 |
print_trace('This feed could not be subscribed to.'); |
146 |
return false; |
147 |
|
148 |
} |
149 |
|
150 |
if($tags) |
151 |
$userdata_controller->setFeedTags($feed, $tags); |
152 |
|
153 |
return $feed; |
154 |
} |
155 |
|
156 |
/** |
157 |
* Subscribes a user to a batch of feeds, based on the feeds' URLs. |
158 |
* |
159 |
* Given the contents of an OPML document, tracks down each URL |
160 |
* referenced by xmlurl="..." and attempts to subscribe to it. |
161 |
* |
162 |
* @param RF_User $user User to subscribe. |
163 |
* @param string $opml OPML document containing feed URL's |
164 |
* @param boolean $published Whether to auto-publish these feeds. |
165 |
* @param array $tags Array of tags |
166 |
* |
167 |
* @return boolean True on success, false otherwise. |
168 |
* |
169 |
* @uses RF_Input_Controller::subscribeUserToFeedURL() |
170 |
*/ |
171 |
function subscribeUserToFeedsFromOPML(&$user, $opml, $published=false, $tags=array()) |
172 |
{ |
173 |
if(preg_match_all('/xmlurl="(\w+:[^"]+)?"/mi', $opml, $matches)) { |
174 |
foreach($matches[1] as $url) |
175 |
$this->subscribeUserToFeedURL($user, $url, $published, $tags); |
176 |
|
177 |
return true; |
178 |
} |
179 |
|
180 |
false; |
181 |
} |
182 |
|
183 |
/** |
184 |
* Subscribes to a feed, based on the feed's URL. |
185 |
* |
186 |
* 1. Fetch RSS data using {@link RF_Input_Controller::fetchFeedURL() fetchFeedURL()}. |
187 |
* 2. If the feed is not valid, it might be a regular HTML page, which should be checked for links to feeds by {@link RF_Input_Controller::autodiscoverFeed() autodiscoverFeed()}. |
188 |
* 3. If Once a valid feed has been found, verify that no current feed exists at that address via {@link RF_Controller::feedExistsWithURL() RF_Controller::feedExistsWithURL()}. |
189 |
* 4. Once a new valid feed has been found, save it to the feeds database. |
190 |
* |
191 |
* Note that this is not the same thing as subscribing a user to a feed, |
192 |
* which is performed by {@link RF_Input_Controller::subscribeUserToFeedURL() RF_Input_Controller::subscribeUserToFeedURL()}. |
193 |
* |
194 |
* @uses RF_Input_Controller::fetchFeedURL() |
195 |
* @uses RF_Input_Controller::autodiscoverFeed() |
196 |
* @uses RF_Controller::feedExistsWithURL() |
197 |
* @uses RF_Feed::RF_Feed() |
198 |
* @uses RF_Controller::saveFeed() |
199 |
* @uses RF_Controller::invokePlugin() "savedNewFeed" event, |
200 |
* parameters: {@link RF_Feed $feed}. |
201 |
*/ |
202 |
function subscribeToFeedURL($url) |
203 |
{ |
204 |
$rss = $this->fetchFeedURL($url); |
205 |
|
206 |
if(!$rss['channel']) { |
207 |
|
208 |
print_trace("URL is not RSS or is invalid: <a href=\"{$url}\">{$url}</a>"); |
209 |
|
210 |
if(!$rss) |
211 |
print_trace(sprintf('Error: <strong>%s</strong>', htmlspecialchars(magpie_error()))); |
212 |
|
213 |
print_trace(sprintf('<a href="http://feeds.archive.org/validator/check?url=%s">The validator may give more information.</a>', urlencode($url))); |
214 |
|
215 |
print_trace('Attempting autodiscovery...'); |
216 |
|
217 |
$r = _fetch_remote_file($url); |
218 |
$c = $r->results; |
219 |
|
220 |
if($c && $r->status >= 200 && $r->status < 300) { |
221 |
|
222 |
$l = $this->autodiscoverFeed($c, $url); |
223 |
|
224 |
if($l) { |
225 |
print_trace(sprintf('Autodiscovery found <a href="%s">%s</a>.', htmlspecialchars($l), htmlspecialchars($l))); |
226 |
|
227 |
if($feed = $this->controller->feedExistsWithURL($l)) { |
228 |
return $feed; |
229 |
} |
230 |
|
231 |
print_trace(sprintf('Attempting to subscribe to <a href="%s">%s</a>.', htmlspecialchars($l), htmlspecialchars($l))); |
232 |
|
233 |
$rss = $this->fetchFeedURL($l); |
234 |
|
235 |
if(!$rss['channel']) { |
236 |
print_trace('URL is not RSS or is invalid, giving up.'); |
237 |
|
238 |
if(!$rss) |
239 |
print_trace(sprintf('Error: <strong>%s</strong>', htmlspecialchars(magpie_error()))); |
240 |
|
241 |
print_trace(sprintf('<a href="http://feeds.archive.org/validator/check?url=%s">The validator may give more information.</a>', urlencode($url))); |
242 |
|
243 |
} else { |
244 |
$url = $l; |
245 |
|
246 |
} |
247 |
} else { |
248 |
print_trace('Autodiscovery failed, giving up.'); |
249 |
} |
250 |
} else { |
251 |
print_trace("Can't load URL <a href=\"{$url}\">{$url}</a>, giving up."); |
252 |
} |
253 |
} |
254 |
|
255 |
if($rss['channel']) { |
256 |
|
257 |
$feed_args = array('url' => $url, |
258 |
'title' => $rss['channel']['title'], |
259 |
'link' => $rss['channel']['link'], |
260 |
'description' => $rss['channel']['description']); |
261 |
|
262 |
$feed = new RF_Feed($feed_args); |
263 |
|
264 |
//$this->controller->invokePlugin('saveNewFeedBefore', array(&$feed)); |
265 |
$this->controller->saveFeed($feed); |
266 |
$this->controller->invokePlugin('savedNewFeed', array(&$feed)); |
267 |
|
268 |
return $feed; |
269 |
} |
270 |
|
271 |
return false; |
272 |
} |
273 |
|
274 |
/** |
275 |
* See whether any obvious links to feeds exist in an HTML document. |
276 |
* This method borrowed from {@link http://feedonfeeds.com Feed On Feeds}. |
277 |
* |
278 |
* @param string $html Complete HTML document. |
279 |
* @param string $location URL from whence it came (for disambiguating relative links). |
280 |
* |
281 |
* @return mixed String feed URL or false if nothing found. |
282 |
*/ |
283 |
function autodiscoverFeed($html, $location) |
284 |
{ |
285 |
if(!$html or !$location) { |
286 |
return false; |
287 |
} else { |
288 |
#search through the HTML, save all <link> tags |
289 |
# and store each links attributes in an associative array |
290 |
preg_match_all('/<link\s+(.*?)\s*\/?'.'>/si', $html, $matches); |
291 |
$links = $matches[1]; |
292 |
$final_links = array(); |
293 |
$link_count = count($links); |
294 |
for($n=0; $n<$link_count; $n++) { |
295 |
$attributes = preg_split('/\s+/s', $links[$n]); |
296 |
foreach($attributes as $attribute) { |
297 |
$att = preg_split('/\s*=\s*/s', $attribute, 2); |
298 |
if(isset($att[1])) { |
299 |
$att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]); |
300 |
$final_link[strtolower($att[0])] = $att[1]; |
301 |
} |
302 |
} |
303 |
$final_links[$n] = $final_link; |
304 |
} |
305 |
#now figure out which one points to the RSS file |
306 |
for($n=0; $n<$link_count; $n++) { |
307 |
if(strtolower($final_links[$n]['rel']) == 'alternate') { |
308 |
if(strtolower($final_links[$n]['type']) == 'application/rss+xml') { |
309 |
$href = $final_links[$n]['href']; |
310 |
} |
311 |
if(!$href and strtolower($final_links[$n]['type']) == 'text/xml') { |
312 |
#kludge to make the first version of this still work |
313 |
$href = $final_links[$n]['href']; |
314 |
} |
315 |
if($href) { |
316 |
if(strstr($href, "http://") !== false) { #if its absolute |
317 |
$full_url = $href; |
318 |
} else { #otherwise, 'absolutize' it |
319 |
$url_parts = parse_url($location); |
320 |
#only made it work for http:// links. Any problem with this? |
321 |
$full_url = "http://$url_parts[host]"; |
322 |
if(isset($url_parts['port'])) { |
323 |
$full_url .= ":$url_parts[port]"; |
324 |
} |
325 |
if($href {0} != '/') { #its a relative link on the domain |
326 |
$full_url .= dirname($url_parts['path']); |
327 |
if(substr($full_url, -1) != '/') { |
328 |
#if the last character isnt a '/', add it |
329 |
$full_url .= '/'; |
330 |
} |
331 |
} |
332 |
$full_url .= $href; |
333 |
} |
334 |
return $full_url; |
335 |
} |
336 |
} |
337 |
} |
338 |
return false; |
339 |
} |
340 |
} |
341 |
|
342 |
/** |
343 |
* Fetch RSS data from a feed by URL. |
344 |
* |
345 |
* 1. Try Magpie, the native PHP strict parser. |
346 |
* 2. If Magpie fails, try a much more forgiving Python parser. |
347 |
* 3. Give up. |
348 |
* |
349 |
* @param string $url URL of feed. |
350 |
* |
351 |
* @return mixed Two element associative array or false if parsing failed. |
352 |
* Array elements: |
353 |
* - "channel": information about feed. |
354 |
* - "items": array of entries. |
355 |
* |
356 |
* @uses get_configured_magpiestuff() Assigned to MAGPIE_USER_AGENT and MAGPIE_CACHE_AGE |
357 |
* @uses get_configured_cache_dir() Assigned to MAGPIE_CACHE_DIR |
358 |
*/ |
359 |
function fetchFeedURL($url) |
360 |
{ |
361 |
/* |
362 |
First, we try to fetch the feed with Magpie. It's faster, |
363 |
because we're already in PHP. It's also very pedantic, and |
364 |
may choke on common errors such as unescaped ampersands. |
365 |
*/ |
366 |
|
367 |
list($user_agent, $cache_age) = get_configured_magpiestuff(); |
368 |
define('MAGPIE_USER_AGENT', $user_agent); |
369 |
define('MAGPIE_CACHE_AGE', $cache_age); |
370 |
|
371 |
define('MAGPIE_CACHE_DIR', get_configured_cache_dir()); |
372 |
|
373 |
// Turn GZip on in Magpie |
374 |
define('MAGPIE_USE_GZIP', true); |
375 |
define('MAGPIE_CACHE_ON', 0); |
376 |
|
377 |
// suppress magpie's warnings, we'll handle those ourselves |
378 |
error_reporting(E_ERROR); |
379 |
|
380 |
require_once('rss_fetch.inc'); |
381 |
|
382 |
$rss_feed = fetch_rss($url); |
383 |
|
384 |
if($rss_feed->channel) { |
385 |
// Magpie was able to retrieve a valid feed, huzzah! |
386 |
return array('channel' => $rss_feed->channel, 'items' => $rss_feed->items); |
387 |
|
388 |
} |
389 |
|
390 |
/* |
391 |
Magpie was NOT able to retrieve a valid feed, so we'll try a |
392 |
much more accepting parser written in Python. This will be |
393 |
slow, but much more likely to find something useful. |
394 |
*/ |
395 |
|
396 |
if(trim(shell_exec('python2 -c "print \'hello world\'"')) == 'hello world') { |
397 |
$python = 'python2'; |
398 |
|
399 |
} elseif(trim(shell_exec('python -c "print \'hello world\'"')) == 'hello world') { |
400 |
$python = 'python'; |
401 |
|
402 |
} else { |
403 |
// no Python love. |
404 |
return false; |
405 |
|
406 |
} |
407 |
|
408 |
require_once('library/JSON-PHP/JSON.php'); |
409 |
$json = new Services_JSON(SERVICES_JSON_LOOSE_TYPE); |
410 |
|
411 |
$raw = shell_exec("{$python} ".escapeshellarg(dirname(__FILE__).'/../../fetch_rss.py').' '.escapeshellarg($url)); |
412 |
$rss_feed = $json->decode($raw); |
413 |
|
414 |
if($rss_feed['channel']) { |
415 |
// UFP was able to retrieve a valid feed |
416 |
return $rss_feed; |
417 |
|
418 |
} |
419 |
|
420 |
/* |
421 |
Oh well. |
422 |
*/ |
423 |
|
424 |
return false; |
425 |
} |
426 |
|
427 |
/** |
428 |
* Update a feed from freshly-parsed RSS data. |
429 |
* |
430 |
* Update a feed's channel information, then update each item |
431 |
* represented in the parsed RSS data. Items that have been modified |
432 |
* are sent away for freshening. |
433 |
* |
434 |
* @param RF_Feed $feed Feed we're updating. |
435 |
* @param mixed $rss_feed Parsed RSS data from {@link RF_Input_Controller::fetchFeedURL() fetchFeedURL()}, if any. |
436 |
* |
437 |
* @uses RF_Input_Controller::fetchFeedURL() |
438 |
* @uses RF_Feed::RF_Feed() |
439 |
* @uses RF_Controller::saveFeed() |
440 |
* @uses RF_Input_Controller::prepareItemArguments() |
441 |
* @uses REBLOG_GUIDSTYLE_AUTHORLINK |
442 |
* @uses REBLOG_GUIDSTYLE_AUTHORTITLE |
443 |
* @uses REBLOG_GUIDSTYLE_AUTHORCONTENT |
444 |
* @uses RF_Input_Controller::updateItem() |
445 |
* @uses REBLOG_INPUT_UPDATED_ITEM |
446 |
* @uses REBLOG_INPUT_NEW_ITEM |
447 |
* @uses REBLOG_INPUT_FRESHEN_UPDATED_ITEMS |
448 |
* @uses print_trace() |
449 |
* @uses RF_Controller::freshenFeedItems() |
450 |
* @uses RF_Controller::flushObsoleteFeedItems() |
451 |
* @uses RF_Controller::invokePlugin() "savedExistingFeed" event, |
452 |
* parameters: {@link RF_Feed $feed}. |
453 |
* @uses RF_Controller::invokePlugin() "updatedFeed" event, |
454 |
* parameters: {@link RF_Feed $feed}. |
455 |
*/ |
456 |
function updateFeed(&$feed, $rss_feed=null) |
457 |
{ |
458 |
if(is_null($rss_feed)) |
459 |
$rss_feed = $this->fetchFeedURL($feed->url); |
460 |
|
461 |
if(!$rss_feed) { |
462 |
return array('new' => 0, |
463 |
'del' => null, //$feed->purge(), |
464 |
'err' => sprintf('Error: <b>%s</b> <a href="http://feeds.archive.org/validator/check?url=%s">try to validate it?</a>', magpie_error(), $feed->url)); |
465 |
} |
466 |
|
467 |
if($rss_feed['channel']) { |
468 |
|
469 |
// calls the feed's constructor, so that new properties are folded in |
470 |
$feed->RF_Feed(array('url' => $feed->url, |
471 |
'title' => $rss_feed['channel']['title'], |
472 |
'link' => $rss_feed['channel']['link'], |
473 |
'description' => $rss_feed['channel']['description'], |
474 |
'xml' => $rss_feed['channel']['xml'])); |
475 |
|
476 |
$this->controller->saveFeed($feed); |
477 |
$this->controller->invokePlugin('savedExistingFeed', array(&$feed)); |
478 |
} |
479 |
|
480 |
// Prepare item arguments from RSS feed contents |
481 |
$items_args = array(); |
482 |
$items_guids = array(); |
483 |
$items_link_authors = array(); |
484 |
$items_title_authors = array(); |
485 |
|
486 |
foreach($rss_feed['items'] as $rss_item) { |
487 |
$item_args = $this->prepareItemArguments($feed, $rss_item); |
488 |
|
489 |
$items_args[] = $item_args; |
490 |
|
491 |
// These will be useful to determine our GUID style |
492 |
$items_link_authors[] = $item_args['link'].', '.$item_args['author']; |
493 |
$items_title_authors[] = $item_args['title'].', '.$item_args['author']; |
494 |
|
495 |
if(!empty($item_args['guid'])) |
496 |
$items_guids[] = $item_args['guid']; |
497 |
} |
498 |
|
499 |
// if there are GUID's defined and they're not all unique, |
500 |
// scrub them all away because they are worse than useless. |
501 |
if(count($items_guids) > count(array_unique($items_guids))) |
502 |
foreach($items_args as $i => $item_args) |
503 |
unset($items_args[$i]['guid']); |
504 |
|
505 |
// Set GUID style based on repetition in feed |
506 |
// Assume that the item link will be sufficient |
507 |
$guid_style = REBLOG_GUIDSTYLE_AUTHORLINK; |
508 |
|
509 |
if(count($items_link_authors) > count(array_unique($items_link_authors))) { |
510 |
// If duplicate links & authors are found, try using the title |
511 |
$guid_style = REBLOG_GUIDSTYLE_AUTHORTITLE; |
512 |
|
513 |
// If duplicate *titles* and authors are found, try using the content |
514 |
// smart feed publishers won't make us stoop to this level |
515 |
if(count($items_title_authors) > count(array_unique($items_title_authors))) |
516 |
$guid_style = REBLOG_GUIDSTYLE_AUTHORCONTENT; |
517 |
} |
518 |
|
519 |
$found_items = array(); |
520 |
$updated_items = array(); |
521 |
|
522 |
foreach($items_args as $item_args) { |
523 |
|
524 |
list($updated, $item) = $this->updateItem($feed, $item_args, $guid_style); |
525 |
|
526 |
if(!empty($item)) { |
527 |
$found_items[] = $item; |
528 |
|
529 |
if($updated == REBLOG_INPUT_NEW_ITEM || (REBLOG_INPUT_FRESHEN_UPDATED_ITEMS && $updated == REBLOG_INPUT_UPDATED_ITEM)) |
530 |
$updated_items[] = $item; |
531 |
} |
532 |
} |
533 |
|
534 |
$feed->usage['updated'] = count($updated_items); |
535 |
|
536 |
print_trace(intval($feed->usage['updated']).' items have been updated or added.'); |
537 |
|
538 |
$this->controller->freshenFeedItems($feed, $updated_items); |
539 |
$this->controller->flushObsoleteFeedItems($feed, $found_items); |
540 |
$this->controller->invokePlugin('updatedFeed', array(&$feed)); |
541 |
} |
542 |
|
543 |
/** |
544 |
* Prepare item arguments from parsed RSS data. |
545 |
* |
546 |
* @param RF_Feed $feed Feed where the item will be assigned. |
547 |
* @param array $item_rss Parsed RSS item data from {@link RF_Input_Controller::fetchFeedURL() fetchFeedURL()}. |
548 |
* |
549 |
* @return array Array of item args, suitable for passing to {@link RF_Item RF_Item constructor} |
550 |
* |
551 |
* @uses ref_rfc8222unix_timestamp() |
552 |
* @uses ref_iso86012unix_timestamp() |
553 |
* @uses RF_Input_Controller::truncateGUID() |
554 |
* @uses RF_Controller::invokePlugin() "preparedItemArguments" event, |
555 |
* parameters: {@link RF_Feed $feed}, array $prepared_args, array $rss_data. |
556 |
*/ |
557 |
function prepareItemArguments(&$feed, $item_rss) |
558 |
{ |
559 |
$item_args = array('feed_id' => $feed->getID(), |
560 |
'guid' => $item_rss['guid'], |
561 |
'link' => $item_rss['link'], |
562 |
'title' => $item_rss['title'], |
563 |
'content' => $item_rss['description'], |
564 |
'xml' => $item_rss['xml']); |
565 |
|
566 |
/* |
567 |
// feedburner.origlink is set, link = feedburner.origlink |
568 |
if(!empty($item_rss['feedburner']['origlink'])) |
569 |
$item_args['link'] = $item_rss['feedburner']['origlink']; |
570 |
*/ |
571 |
|
572 |
// if link is empty but guid isn't, link = guid |
573 |
if(empty($item_args['link']) && !empty($item_args['guid'])) |
574 |
$item_args['link'] = $item_args['guid']; |
575 |
|
576 |
// if guid is empty but about (rdf) isn't, guid = about |
577 |
if(empty($item_args['guid']) && !empty($item_rss['about'])) |
578 |
$item_args['guid'] = $item_rss['about']; |
579 |
|
580 |
// if guid is empty but id isn't, guid = id |
581 |
if(empty($item_args['guid']) && !empty($item_rss['id'])) |
582 |
$item_args['guid'] = $item_rss['id']; |
583 |
|
584 |
// if atom_content isn't empty, content = atom_content |
585 |
if(!empty($item_rss['atom_content'])) |
586 |
$item_args['content'] = $item_rss['atom_content']; |
587 |
|
588 |
// if title is empty, generate one |
589 |
if(empty($item_args['title'])) |
590 |
$item_args['title'] = '[Untitled]'; |
591 |
|
592 |
// if pubdate is valid, modified = pubdate |
593 |
if(!empty($item_rss['pubdate']) && $modified = ref_rfc8222unix_timestamp($item_rss['pubdate'])) |
594 |
$item_args['modified'] = $modified; |
595 |
|
596 |
// if category exists, category = category |
597 |
if(!empty($item_rss['category'])) |
598 |
$item_args['category'] = $item_rss['category']; |
599 |
|
600 |
// if modified is valid, modified = modified |
601 |
if(!empty($item_rss['modified']) && $modified = ref_iso86012unix_timestamp($item_rss['modified'])) |
602 |
$item_args['modified'] = $modified; |
603 |
|
604 |
// if dc:date is valid, modified = dc:date |
605 |
if(!empty($item_rss['dc']['date']) && $modified = ref_iso86012unix_timestamp($item_rss['dc']['date'])) |
606 |
$item_args['modified'] = $modified; |
607 |
|
608 |
// if author exists, author = author |
609 |
if(!empty($item_rss['author'])) |
610 |
$item_args['author'] = $item_rss['author']; |
611 |
|
612 |
// if author_name exists, author = author_name |
613 |
if(!empty($item_rss['author_name'])) |
614 |
$item_args['author'] = $item_rss['author_name']; |
615 |
|
616 |
// if dc:creator exists, author = dc:creator |
617 |
if(!empty($item_rss['dc']['creator'])) |
618 |
$item_args['author'] = $item_rss['dc']['creator']; |
619 |
|
620 |
// if dc:subject exists, category = dc:subject |
621 |
if(!empty($item_rss['dc']['subject'])) |
622 |
$item_args['category'] = $item_rss['dc']['subject']; |
623 |
|
624 |
// if dc:identifier exists, guid = dc:identifier |
625 |
if(!empty($item_rss['dc']['identifier'])) |
626 |
$item_args['guid'] = $item_rss['dc']['identifier']; |
627 |
|
628 |
if(empty($item_args['guid'])) |
629 |
unset($item_args['guid']); |
630 |
|
631 |
$item_args['guid'] = $this->truncateGUID($item_args['guid']); |
632 |
|
633 |
$this->controller->invokePlugin('preparedItemArguments', array(&$feed, &$item_args, &$item_rss)); |
634 |
|
635 |
return $item_args; |
636 |
} |
637 |
|
638 |
/** |
639 |
* Truncates a GUID for the VARCHAR column |
640 |
* |
641 |
* @param string $guid One GUID |
642 |
* |
643 |
* @return string Shorter GUID |
644 |
*/ |
645 |
function truncateGUID($guid) |
646 |
{ |
647 |
return substr($guid, 0, 255); |
648 |
} |
649 |
|
650 |
/** |
651 |
* Invent a new, hopefully-unique GUID for an item. |
652 |
* |
653 |
* Prepend the GUID prefix to an md5 hash of item properties. |
654 |
* $guid_style argument determines which properties are used. |
655 |
* These are generally some combination of feed URL, author & link (ideal), |
656 |
* entry title, or entry content. |
657 |
* |
658 |
* In a perfect world, these will be truly globally-unique, suitable for |
659 |
* transfer between installations of Refeed. |
660 |
* |
661 |
* @param RF_Feed $feed Feed where the item will be assigned. |
662 |
* @param array $item_args Various item properties in associative array form. |
663 |
* @param int $guid_style Flag: how will we generate the GUID? |
664 |
* |
665 |
* @return string Hopefully-unique GUID suitable for passing on outgoing feeds. |
666 |
* |
667 |
* @uses RF_Input_Controller::truncateGUID() |
668 |
* @uses get_configured_guid_prefix() Prefixed to generated GUID hashes |
669 |
* @uses REBLOG_GUIDSTYLE_AUTHORLINK |
670 |
* @uses REBLOG_GUIDSTYLE_AUTHORTITLE |
671 |
* @uses REBLOG_GUIDSTYLE_AUTHORCONTENT |
672 |
*/ |
673 |
function generateGUID(&$feed, $item_args, $guid_style) |
674 |
{ |
675 |
switch($guid_style) { |
676 |
case REBLOG_GUIDSTYLE_AUTHORLINK: |
677 |
return $this->truncateGUID(get_configured_guid_prefix().md5(join('-', array($feed->url, $item_args['author'], $item_args['link'])))); |
678 |
|
679 |
case REBLOG_GUIDSTYLE_AUTHORTITLE: |
680 |
return $this->truncateGUID(get_configured_guid_prefix().md5(join('-', array($feed->url, $item_args['author'], $item_args['title'])))); |
681 |
|
682 |
case REBLOG_GUIDSTYLE_AUTHORCONTENT: |
683 |
return $this->truncateGUID(get_configured_guid_prefix().md5(join('-', array($feed->url, $item_args['author'], $item_args['content'])))); |
684 |
} |
685 |
} |
686 |
|
687 |
/** |
688 |
* Find an existing item based on parsed RSS data. |
689 |
* |
690 |
* @param RF_Feed $feed Feed where the item will be assigned. |
691 |
* @param array $item_args Array of item args, suitable for passing to {@link RF_Item RF_Item constructor} |
692 |
* @param int $guid_style How will we generate the GUID? |
693 |
* |
694 |
* @return mixed {@link RF_Item RF_Item} if such an item was found, false otherwise. |
695 |
* |
696 |
* @uses RF_Input_Controller::generateGUID() |
697 |
* @uses RF_Controller::itemExistsWithGUID() |
698 |
*/ |
699 |
function existingItem(&$feed, $item_args, $guid_style) |
700 |
{ |
701 |
// if guid is empty, generate one |
702 |
if(empty($item_args['guid'])) |
703 |
$item_args['guid'] = $this->generateGUID($feed, $item_args, $guid_style); |
704 |
|
705 |
return $this->controller->itemExistsWithGUID($feed, $item_args['guid']); |
706 |
} |
707 |
|
708 |
/** |
709 |
* Apply changes to an item. |
710 |
* |
711 |
* @param RF_Item $item Item where changes may be written |
712 |
* @param array $args Array of item arguments, to be compared against item properties |
713 |
* @return boolean True if any item properties have been modified |
714 |
*/ |
715 |
function itemChanges(&$item, $args) |
716 |
{ |
717 |
$check_properties = array('link', 'title', 'content', 'author', 'category'); |
718 |
$changed_properties = array(); |
719 |
|
720 |
foreach($check_properties as $property) { |
721 |
$old = trim(strtolower($item->$property)); |
722 |
$new = trim(strtolower($args[$property])); |
723 |
|
724 |
if($old != $new && !(empty($item->$property) && empty($args[$property]))) { |
725 |
$item->$property = $args[$property]; |
726 |
$changed_properties[] = $property; |
727 |
} |
728 |
} |
729 |
|
730 |
if(count($changed_properties)) { |
731 |
print_trace(sprintf('Item %d (%s) has been changed (%s).', $item->getID(), htmlspecialchars($item_args['guid']), htmlspecialchars(join(', ', $changed_properties)))); |
732 |
return true; |
733 |
} |
734 |
|
735 |
return false; |
736 |
} |
737 |
|
738 |
/** |
739 |
* Update an item |
740 |
* |
741 |
* @param RF_Feed $feed Feed where the item will be assigned. |
742 |
* @param array $item_args Array of item args, suitable for passing to {@link RF_Item RF_Item constructor} |
743 |
* @param int $guid_style How will we generate the GUID? |
744 |
* |
745 |
* @return array Two element array: |
746 |
* - element 0: boolean flag, was item updated? |
747 |
* - element 1: {@link RF_Item RF_Item} associated item object. |
748 |
* |
749 |
* @uses RF_Controller::itemExistsWithGUID() |
750 |
* @uses print_trace() |
751 |
* @uses RF_Controller::saveItem() |
752 |
* @uses RF_Input_Controller::generateGUID() |
753 |
* @uses RF_Controller::invokePlugin() "saveModifiedItemBefore" event, |
754 |
* parameters: {@link RF_Item $item}. |
755 |
* @uses RF_Controller::invokePlugin() "savedModifiedItem" event, |
756 |
* parameters: {@link RF_Item $item}. |
757 |
* @uses RF_Controller::invokePlugin() "saveNewItemBefore" event, |
758 |
* parameters: {@link RF_Item $item}. |
759 |
* @uses RF_Controller::invokePlugin() "savedNewItem" event, |
760 |
* parameters: {@link RF_Item $item}. |
761 |
* @uses REBLOG_INPUT_UPDATED_ITEM |
762 |
* @uses REBLOG_INPUT_UNCHANGED_ITEM |
763 |
* @uses REBLOG_INPUT_NEW_ITEM |
764 |
*/ |
765 |
function updateItem(&$feed, $item_args, $guid_style) |
766 |
{ |
767 |
// if guid is empty, generate one |
768 |
if(empty($item_args['guid'])) |
769 |
$item_args['guid'] = $this->generateGUID($feed, $item_args, $guid_style); |
770 |
|
771 |
if($item_existing = $this->controller->itemExistsWithGUID($feed, $item_args['guid'])) { |
772 |
|
773 |
print_trace('Item '.htmlspecialchars($item_args['guid']).' already exists.'); |
774 |
|
775 |
if($this->itemChanges($item_existing, $item_args)) { |
776 |
|
777 |
$this->controller->invokePlugin('saveModifiedItemBefore', array(&$item_existing)); |
778 |
if(!empty($item_existing)) { |
779 |
$this->controller->saveItem($item_existing); |
780 |
$this->controller->invokePlugin('savedModifiedItem', array(&$item_existing)); |
781 |
} |
782 |
|
783 |
return array(REBLOG_INPUT_UPDATED_ITEM, $item_existing); |
784 |
} |
785 |
|
786 |
return array(REBLOG_INPUT_UNCHANGED_ITEM, $item_existing); |
787 |
} |
788 |
|
789 |
$item_new = new RF_Item($item_args); |
790 |
|
791 |
$this->controller->invokePlugin('saveNewItemBefore', array(&$item_new)); |
792 |
if(!empty($item_new)) { |
793 |
$this->controller->saveItem($item_new); |
794 |
$this->controller->invokePlugin('savedNewItem', array(&$item_new)); |
795 |
printf('<p>Item %s has been added.</p>', htmlspecialchars($item_args['guid'])); |
796 |
} |
797 |
|
798 |
# I think this is superfluous, not sure in resolving CVS conflict |
799 |
# print_trace('Item '.htmlspecialchars($item_args['guid']).' has been added.'); |
800 |
|
801 |
return array(REBLOG_INPUT_NEW_ITEM, $item_new); |
802 |
} |
803 |
} |
804 |
|
805 |
?> |