1
0
mirror of https://github.com/chylex/Nextcloud-News.git synced 2025-04-12 19:15:43 +02:00

Possible backwards incompatible change by using the link provided by simplepie instead of the user for the url hash. This prevents duplication of the feed when adding a slightly different feed url which points to the same feed and allows a speedup from O(n) to O(1) for article enhanchers

This commit is contained in:
Bernhard Posselt 2013-08-28 19:19:28 +02:00
parent a9eb72911b
commit 2f67340e55
12 changed files with 109 additions and 215 deletions

View File

@ -1,7 +1,7 @@
owncloud-news (1.207)
owncloud-news (1.401)
* Add possibility to hook up article enhancers which fetch article content directly from the web page
* Add article enhancer for explosm.net to directly fetch comics
* Possible backwards incompatible change by using the link provided by simplepie instead of the user for the url hash. This prevents duplication of the feed when adding a slightly different feed url which points to the same feed and allows a speedup from O(n) to O(1) for article enhanchers
owncloud-news (1.206)
* Also handle URLErrors in updater script that are thrown when the domain of a feed is not found

View File

@ -94,15 +94,16 @@ class FeedBusinessLayer extends BusinessLayer {
*/
public function create($feedUrl, $folderId, $userId){
// first try if the feed exists already
try {
$this->mapper->findByUrlHash(md5($feedUrl), $userId);
throw new BusinessLayerExistsException(
$this->api->getTrans()->t('Can not add feed: Exists already'));
} catch(DoesNotExistException $ex){}
try {
list($feed, $items) = $this->feedFetcher->fetch($feedUrl);
// try again if feed exists depending on the reported link
try {
$this->mapper->findByUrlHash($feed->getUrlHash(), $userId);
throw new BusinessLayerExistsException(
$this->api->getTrans()->t('Can not add feed: Exists already'));
} catch(DoesNotExistException $ex){}
// insert feed
$feed->setFolderId($folderId);
$feed->setUserId($userId);
@ -123,7 +124,7 @@ class FeedBusinessLayer extends BusinessLayer {
continue;
} catch(DoesNotExistException $ex){
$unreadCount += 1;
$item = $this->enhancer->enhance($item);
$item = $this->enhancer->enhance($item, $feed->getLink());
$this->itemMapper->insert($item);
}
}
@ -189,7 +190,8 @@ class FeedBusinessLayer extends BusinessLayer {
try {
$this->itemMapper->findByGuidHash($item->getGuidHash(), $feedId, $userId);
} catch(DoesNotExistException $ex){
$item = $this->enhancer->enhance($item);
$item = $this->enhancer->enhance($item,
$existingFeed->getLink());
$this->itemMapper->insert($item);
}
}

View File

@ -57,7 +57,6 @@ use \OCA\News\Utility\Updater;
use \OCA\News\Utility\SimplePieFileFactory;
use \OCA\News\Utility\ArticleEnhancer\Enhancer;
use \OCA\News\Utility\ArticleEnhancer\DefaultEnhancer;
use \OCA\News\Utility\ArticleEnhancer\CyanideAndHappinessEnhancer;
@ -234,8 +233,7 @@ class DIContainer extends BaseContainer {
// register fetchers in order
// the most generic enhancer should be the last one
$enhancer->registerEnhancer($c['CyanideAndHappinessEnhancer']);
$enhancer->registerEnhancer($c['DefaultEnhancer']);
$enhancer->registerEnhancer('explosm.net', $c['CyanideAndHappinessEnhancer']);
return $enhancer;
});

View File

@ -105,10 +105,6 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->api->expects($this->once())
->method('getTrans')
->will($this->returnValue($trans));
$this->feedMapper->expects($this->once())
->method('findByUrlHash')
->with($this->equalTo(md5($url)), $this->equalTo($this->user))
->will($this->throwException(new DoesNotExistException('yo')));
$this->fetcher->expects($this->once())
->method('fetch')
->with($this->equalTo($url))
@ -123,6 +119,8 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$createdFeed = new Feed();
$ex = new DoesNotExistException('yo');
$createdFeed->setUrl($url);
$createdFeed->setUrlHash('hsssi');
$createdFeed->setLink($url);
$item1 = new Item();
$item1->setGuidHash('hi');
$item2 = new Item();
@ -134,7 +132,7 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->feedMapper->expects($this->once())
->method('findByUrlHash')
->with($this->equalTo(md5($url)), $this->equalTo($this->user))
->with($this->equalTo($createdFeed->getUrlHash()), $this->equalTo($this->user))
->will($this->throwException($ex));
$this->fetcher->expects($this->once())
->method('fetch')
@ -153,7 +151,8 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
->will($this->throwException($ex));
$this->enhancer->expects($this->at(0))
->method('enhance')
->with($this->equalTo($return[1][1]))
->with($this->equalTo($return[1][1]),
$this->equalTo($url))
->will($this->returnValue($return[1][1]));
$this->itemMapper->expects($this->at(1))
->method('insert')
@ -167,7 +166,8 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
->will($this->throwException($ex));
$this->enhancer->expects($this->at(1))
->method('enhance')
->with($this->equalTo($return[1][0]))
->with($this->equalTo($return[1][0]),
$this->equalTo($url))
->will($this->returnValue($return[1][0]));
$this->itemMapper->expects($this->at(3))
->method('insert')
@ -183,9 +183,11 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
public function testCreateItemGuidExistsAlready(){
$url = 'http://test';
$folderId = 10;
$createdFeed = new Feed();
$ex = new DoesNotExistException('yo');
$createdFeed = new Feed();
$createdFeed->setUrl($url);
$createdFeed->setUrlHash($url);
$createdFeed->setLink($url);
$item1 = new Item();
$item1->setGuidHash('hi');
$item2 = new Item();
@ -197,7 +199,8 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$this->feedMapper->expects($this->once())
->method('findByUrlHash')
->with($this->equalTo(md5($url)), $this->equalTo($this->user))
->with($this->equalTo($createdFeed->getUrlHash()),
$this->equalTo($this->user))
->will($this->throwException($ex));
$this->fetcher->expects($this->once())
->method('fetch')
@ -216,7 +219,8 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
->will($this->throwException($ex));
$this->enhancer->expects($this->at(0))
->method('enhance')
->with($this->equalTo($return[1][1]))
->with($this->equalTo($return[1][1]),
$this->equalTo($url))
->will($this->returnValue($return[1][1]));
$this->itemMapper->expects($this->at(1))
->method('insert')
@ -240,6 +244,7 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
$feed = new Feed();
$feed->setId(3);
$feed->getUrl('test');
$feed->setUrlHash('yo');
$item = new Item();
$item->setGuidHash(md5('hi'));
@ -268,7 +273,8 @@ class FeedBusinessLayerTest extends \OCA\AppFramework\Utility\TestUtility {
->will($this->throwException($ex));
$this->enhancer->expects($this->at(0))
->method('enhance')
->with($this->equalTo($items[0]))
->with($this->equalTo($items[0]),
$this->equalTo($feed->getUrl()))
->will($this->returnValue($items[0]));
$this->itemMapper->expects($this->once())
->method('insert')

View File

@ -236,13 +236,13 @@ class FeedFetcherTest extends \OCA\AppFramework\Utility\TestUtility {
private function createFeed($hasFeedFavicon=false, $hasWebFavicon=false) {
$this->expectCore('get_title', $this->feedTitle);
$this->expectCore('get_link', $this->feedLink);
$this->expectCore('get_permalink', $this->feedLink);
$feed = new Feed();
$feed->setTitle(html_entity_decode($this->feedTitle));
$feed->setUrl($this->url);
$feed->setLink($this->feedLink);
$feed->setUrlHash(md5($this->url));
$feed->setUrlHash(md5($this->feedLink));
$feed->setAdded($this->time);
if($hasWebFavicon) {
@ -281,13 +281,13 @@ class FeedFetcherTest extends \OCA\AppFramework\Utility\TestUtility {
public function testFetchMapItemsNoFeedTitleUsesUrl(){
$this->expectCore('get_title', '');
$this->expectCore('get_link', $this->feedLink);
$this->expectCore('get_permalink', $this->feedLink);
$feed = new Feed();
$feed->setTitle($this->url);
$feed->setUrl($this->url);
$feed->setLink($this->feedLink);
$feed->setUrlHash(md5($this->url));
$feed->setUrlHash(md5($this->feedLink));
$feed->setAdded($this->time);
$feed->setFaviconLink(null);
@ -342,13 +342,13 @@ class FeedFetcherTest extends \OCA\AppFramework\Utility\TestUtility {
public function testFetchMapItemsGetFavicon() {
$this->expectCore('get_title', $this->feedTitle);
$this->expectCore('get_link', $this->feedLink);
$this->expectCore('get_permalink', $this->feedLink);
$feed = new Feed();
$feed->setTitle(html_entity_decode($this->feedTitle));
$feed->setUrl($this->url);
$feed->setLink($this->feedLink);
$feed->setUrlHash(md5($this->url));
$feed->setUrlHash(md5($this->feedLink));
$feed->setAdded($this->time);
$feed->setFaviconLink($this->webFavicon);
@ -369,13 +369,13 @@ class FeedFetcherTest extends \OCA\AppFramework\Utility\TestUtility {
public function testFetchMapItemsNoGetFavicon() {
$this->expectCore('get_title', $this->feedTitle);
$this->expectCore('get_link', $this->feedLink);
$this->expectCore('get_permalink', $this->feedLink);
$feed = new Feed();
$feed->setTitle(html_entity_decode($this->feedTitle));
$feed->setUrl($this->url);
$feed->setLink($this->feedLink);
$feed->setUrlHash(md5($this->url));
$feed->setUrlHash(md5($this->feedLink));
$feed->setAdded($this->time);
$this->core->expects($this->once())

View File

@ -63,10 +63,10 @@ class ArticleEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
}
public function testCanHandle() {
public function testDoesNotModifiyNotMatchingResults() {
$item = new Item();
$item->setUrl('http://explosm.net/comics');
$this->assertTrue($this->testEnhancer->canHandle($item));
$item->setUrl('http://explosm.net');
$this->assertEquals($item, $this->testEnhancer->enhance($item));
}

View File

@ -1,54 +0,0 @@
<?php
/**
* ownCloud - News
*
* @author Alessandro Cosentino
* @author Bernhard Posselt
* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
* License as published by the Free Software Foundation; either
* version 3 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
*
* You should have received a copy of the GNU Affero General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
*/
namespace OCA\News\Utility\ArticleEnhancer;
use \OCA\News\Db\Item;
require_once(__DIR__ . "/../../../classloader.php");
class DefaultEnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
private $testEnhancer;
protected function setUp() {
$this->testEnhancer = new DefaultEnhancer();
}
public function testCanHandle() {
$item = new Item();
$this->assertTrue($this->testEnhancer->canHandle($item));
}
public function testEnhance() {
$item = new Item();
$this->assertEquals($item, $this->testEnhancer->enhance($item));
}
}

View File

@ -42,67 +42,49 @@ class EnhancerTest extends \OCA\AppFramework\Utility\TestUtility {
'\OCA\News\Utility\ArticleEnhancer\ArticleEnhancer')
->disableOriginalConstructor()
->getMock();
$this->articleEnhancer2 = $this->getMockBuilder(
'\OCA\News\Utility\ArticleEnhancer\ArticleEnhancer')
->disableOriginalConstructor()
->getMock();
$this->enhancer->registerEnhancer('test.com', $this->articleEnhancer);
}
public function testFetch(){
public function testEnhanceSetsCorrectHash(){
$item = new Item();
$item->setUrl('hi');
$urls = array(
'https://test.com',
'https://www.test.com',
'https://test.com/',
'http://test.com',
'http://test.com/',
'http://www.test.com'
);
for ($i=0; $i < count($urls); $i++) {
$url = $urls[$i];
$this->articleEnhancer->expects($this->at($i))
->method('enhance')
->with($this->equalTo($item))
->will($this->returnValue($item));
}
for ($i=0; $i < count($urls); $i++) {
$url = $urls[$i];
$result = $this->enhancer->enhance($item, $url);
$this->assertEquals($item, $result);
}
$this->articleEnhancer->expects($this->once())
->method('canHandle')
->with($this->equalTo($item))
->will($this->returnValue(true));
$this->enhancer->registerEnhancer($this->articleEnhancer);
$this->enhancer->enhance($item);
}
public function testMultipleFetchers(){
public function testNotMatchShouldJustReturnItem() {
$item = new Item();
$item->setUrl('hi');
$this->articleEnhancer->expects($this->once())
->method('canHandle')
->with($this->equalTo($item))
->will($this->returnValue(false));
$this->articleEnhancer2->expects($this->once())
->method('canHandle')
->with($this->equalTo($item))
->will($this->returnValue(true));
$this->enhancer->registerEnhancer($this->articleEnhancer);
$this->enhancer->registerEnhancer($this->articleEnhancer2);
$url = 'https://tests.com';
$this->articleEnhancer->expects($this->never())
->method('enhance');
$this->enhancer->enhance($item);
}
public function testMultipleFetchersOnlyOneShouldHandle(){
$item = new Item();
$item->setUrl('hi');
$return = 'zeas';
$this->articleEnhancer->expects($this->once())
->method('canHandle')
->with($this->equalTo($item))
->will($this->returnValue(true));
$this->articleEnhancer->expects($this->once())
->method('enhance')
->with($this->equalTo($item))
->will($this->returnValue($return));
$this->articleEnhancer2->expects($this->never())
->method('canHandle');
$this->enhancer->registerEnhancer($this->articleEnhancer);
$this->enhancer->registerEnhancer($this->articleEnhancer2);
$result = $this->enhancer->enhance($item);
$this->assertEquals($return, $result);
$result = $this->enhancer->enhance($item, $url);
$this->assertEquals($item, $result);
}

View File

@ -60,27 +60,23 @@ abstract class ArticleEnhancer {
}
public function canHandle($item){
return preg_match($this->articleUrlRegex, $item->getUrl()) == true;
}
public function enhance($item){
$file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
$dom = new \DOMDocument();
@$dom->loadHTML($file->body);
$xpath = new \DOMXpath($dom);
$xpathResult = $xpath->evaluate($this->articleXPath);
if(preg_match($this->articleUrlRegex, $item->getUrl())) {
$file = $this->fileFactory->getFile($item->getUrl(), $this->maximumTimeout);
$dom = new \DOMDocument();
@$dom->loadHTML($file->body);
$xpath = new \DOMXpath($dom);
$xpathResult = $xpath->evaluate($this->articleXPath);
// in case it wasnt a text query assume its a single
if(!is_string($xpathResult)) {
$xpathResult = $this->domToString($xpathResult);
// in case it wasnt a text query assume its a single
if(!is_string($xpathResult)) {
$xpathResult = $this->domToString($xpathResult);
}
$sanitizedResult = $this->purifier->purify($xpathResult);
$item->setBody($sanitizedResult);
}
$sanitizedResult = $this->purifier->purify($xpathResult);
$item->setBody($sanitizedResult);
return $item;
}

View File

@ -1,49 +0,0 @@
<?php
/**
* ownCloud - News
*
* @author Alessandro Cosentino
* @author Bernhard Posselt
* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
* @copyright 2012 Bernhard Posselt dev@bernhard-posselt.com
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
* License as published by the Free Software Foundation; either
* version 3 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
*
* You should have received a copy of the GNU Affero General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
*/
namespace OCA\News\Utility\ArticleEnhancer;
use \OCA\News\Utility\SimplePieFileFactory;
class DefaultEnhancer extends ArticleEnhancer {
public function __construct(){
parent::__construct(null, new SimplePieFileFactory(), null, null, null);
}
public function canHandle($item){
return true;
}
public function enhance($item){
return $item;
}
}

View File

@ -28,23 +28,36 @@ namespace OCA\News\Utility\ArticleEnhancer;
class Enhancer {
private $enhancers;
private $enhancers = array();
public function __construct(){
$this->enhancers = array();
public function registerEnhancer($feedUrl, ArticleEnhancer $enhancer){
$feedUrl = $this->removeTrailingSlash($feedUrl);
// create hashkeys for all supported protocols for quick access
$this->enhancers[$feedUrl] = $enhancer;
$this->enhancers['https://' . $feedUrl] = $enhancer;
$this->enhancers['http://' . $feedUrl] = $enhancer;
$this->enhancers['https://www.' . $feedUrl] = $enhancer;
$this->enhancers['http://www.' . $feedUrl] = $enhancer;
}
public function registerEnhancer(ArticleEnhancer $enhancer){
array_push($this->enhancers, $enhancer);
public function enhance($item, $feedUrl){
$feedUrl = $this->removeTrailingSlash($feedUrl);
if(array_key_exists($feedUrl, $this->enhancers)) {
return $this->enhancers[$feedUrl]->enhance($item);
} else {
return $item;
}
}
public function enhance($item){
foreach($this->enhancers as $enhancer){
if($enhancer->canHandle($item)){
return $enhancer->enhance($item);
}
private function removeTrailingSlash($url) {
if($url[strlen($url)-1] === '/') {
return substr($url, 0, -1);
} else {
return $url;
}
}

View File

@ -187,8 +187,8 @@ class FeedFetcher implements IFeedFetcher {
$feed->setTitle($title);
$feed->setUrl($url);
$feed->setLink($simplePieFeed->get_link());
$feed->setUrlHash(md5($url));
$feed->setLink($simplePieFeed->get_permalink());
$feed->setUrlHash(md5($feed->getLink()));
$feed->setAdded($this->time->getTime());
if ($getFavicon) {