1
0
mirror of https://github.com/chylex/Nextcloud-News.git synced 2025-04-12 10:15:44 +02:00
Nextcloud-News/lib/AppInfo/Application.php
DriverXX 6673cbc3d9 Reimplement full-text scraping ()
Add readability.php scraper
Fixes 
Signed-off-by: Gioele Falcetti <thegio.f@gmail.com>
2019-12-24 09:33:19 +01:00

208 lines
6.4 KiB
PHP

<?php
/**
* Nextcloud - News
*
* This file is licensed under the Affero General Public License version 3 or
* later. See the COPYING file.
*
* @author Alessandro Cosentino <cosenal@gmail.com>
* @author Bernhard Posselt <dev@bernhard-posselt.com>
* @copyright 2012 Alessandro Cosentino
* @copyright 2012-2014 Bernhard Posselt
*/
namespace OCA\News\AppInfo;
use Closure;
use FeedIo\FeedIo;
use HTMLPurifier;
use HTMLPurifier_Config;
use Favicon\Favicon;
use OCA\News\Config\FetcherConfig;
use OCA\News\Utility\PsrLogger;
use OCP\BackgroundJob\IJobList;
use OCP\IContainer;
use OCP\INavigationManager;
use OCP\IURLGenerator;
use OCP\IConfig;
use OCP\ITempManager;
use OCP\AppFramework\App;
use OCP\Files\IRootFolder;
use OCP\Files\Node;
use OCA\News\Config\AppConfig;
use OCA\News\Config\Config;
use OCA\News\Db\MapperFactory;
use OCA\News\Db\ItemMapper;
use OCA\News\Fetcher\FeedFetcher;
use OCA\News\Fetcher\Fetcher;
use OCA\News\Fetcher\YoutubeFetcher;
use OCA\News\Utility\ProxyConfigParser;
use OCA\News\Scraper\Scraper;
/**
* Class Application
*
* @package OCA\News\AppInfo
*/
class Application extends App
{
/**
* Application constructor.
*
* @param array $urlParams Parameters
*/
public function __construct(array $urlParams = [])
{
parent::__construct('news', $urlParams);
$container = $this->getContainer();
// files
$container->registerService('checksums', function () {
return file_get_contents(__DIR__ . '/checksum.json');
});
$container->registerService('info', function () {
return file_get_contents(__DIR__ . '/../../appinfo/info.xml');
});
// parameters
$container->registerParameter('exploreDir', __DIR__ . '/../Explore/feeds');
$container->registerParameter('configFile', 'config.ini');
// factories
$container->registerService(ItemMapper::class, function (IContainer $c): ItemMapper {
return $c->query(MapperFactory::class)->build();
});
/**
* App config parser.
*/
$container->registerService(AppConfig::class, function (IContainer $c): AppConfig {
$config = new AppConfig(
$c->query(INavigationManager::class),
$c->query(IURLGenerator::class),
$c->query(IJobList::class)
);
$config->loadConfig($c->query('info'));
return $config;
});
/**
* Core
*/
$container->registerService('LoggerParameters', function (IContainer $c): array {
return ['app' => $c->query('AppName')];
});
$container->registerService('databaseType', function (IContainer $c) {
return $c->query(IConfig::class)->getSystemValue('dbtype');
});
$container->registerService('ConfigView', function (IContainer $c): Node {
/** @var IRootFolder $fs */
$fs = $c->query(IRootFolder::class);
$path = 'news/config';
if ($fs->nodeExists($path)) {
return $fs->get($path);
} else {
return $fs->newFolder($path);
}
});
/**
* Logger base
*/
$container->registerService(PsrLogger::class, function (IContainer $c): PsrLogger {
return new PsrLogger(
$c->query('ServerContainer')->getLogger(),
$c->query('AppName')
);
});
$container->registerService(Config::class, function (IContainer $c): Config {
$config = new Config(
$c->query('ConfigView'),
$c->query(PsrLogger::class),
$c->query('LoggerParameters')
);
$config->read($c->query('configFile'), true);
return $config;
});
$container->registerService(HTMLPurifier::class, function (IContainer $c): HTMLPurifier {
$directory = $c->query(IConfig::class)->getSystemValue('datadirectory') . '/news/cache/purifier';
if (!is_dir($directory)) {
mkdir($directory, 0770, true);
}
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML.ForbiddenAttributes', 'class');
$config->set('Cache.SerializerPath', $directory);
$config->set('HTML.SafeIframe', true);
$config->set(
'URI.SafeIframeRegexp',
'%^https://(?:www\.)?(' .
'youtube(?:-nocookie)?.com/embed/|' .
'player.vimeo.com/video/|' .
'vk.com/video_ext.php)%'
); //allow YouTube and Vimeo
$def = $config->getHTMLDefinition(true);
$def->addAttribute('iframe', 'allowfullscreen', 'Bool');
return new HTMLPurifier($config);
});
/**
* Fetchers
*/
$container->registerService(FetcherConfig::class, function (IContainer $c): FetcherConfig {
$fConfig = new FetcherConfig();
$fConfig->setConfig($c->query(Config::class))
->setProxy($c->query(IConfig::class));
return $fConfig;
});
$container->registerService(FeedIo::class, function (IContainer $c): FeedIo {
$config = $c->query(FetcherConfig::class);
return new FeedIo($config->getClient(), $c->query(PsrLogger::class));
});
$container->registerService(Favicon::class, function (IContainer $c): Favicon {
$favicon = new Favicon();
$tempManager = $c->query(ITempManager::class);
$settings = ['dir' => $tempManager->getTempBaseDir()];
$favicon->cache($settings);
return $favicon;
});
/**
* @noinspection PhpParamsInspection
*/
$container->registerService(Fetcher::class, function (IContainer $c): Fetcher {
$fetcher = new Fetcher();
// register fetchers in order, the most generic fetcher should be
// the last one
$fetcher->registerFetcher($c->query(YoutubeFetcher::class));
$fetcher->registerFetcher($c->query(FeedFetcher::class));
return $fetcher;
});
/**
* Scrapers
*/
$container->registerService(Scraper::class, function (IContainer $c): Scraper {
return new Scraper(
$c->query(PsrLogger::class)
);
});
}
}