Refactoring of init.php
- fetching is now done by a global function in fetch.php. This allows to use this functions in test/xpath.php for testing. - besides of user configuration as explained in README.d json files from `mods` are now also loaded and used for fetching articles - loading of json files is done only one time and not for each article to process - test/xpath.php is now a script, which can be used to test a configuration from `mods`-directory Added two mods for 4players and visions.de. TODO: rewrite README.md
This commit is contained in:
86
fetch.php
Normal file
86
fetch.php
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
//
|
||||||
|
// Main global function for fetching and processing articles
|
||||||
|
//
|
||||||
|
function fetch_article($article, $config) {
|
||||||
|
|
||||||
|
// the only config type supported now is 'xpath'
|
||||||
|
if ($config['type'] != 'xpath')
|
||||||
|
return false;
|
||||||
|
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
$link = trim($article['link']);
|
||||||
|
|
||||||
|
if (defined('VERSION') && version_compare(VERSION, '1.7.9', '>=')) {
|
||||||
|
$html = fetch_file_contents($link);
|
||||||
|
$content_type = $fetch_last_content_type;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// fallback to file_get_contents()
|
||||||
|
$html = file_get_contents($link);
|
||||||
|
|
||||||
|
// try to fetch charset from HTTP headers
|
||||||
|
$headers = $http_response_header;
|
||||||
|
$content_type = false;
|
||||||
|
foreach ($headers as $h) {
|
||||||
|
if (substr(strtolower($h), 0, 13) == 'content-type:') {
|
||||||
|
$content_type = substr($h, 14);
|
||||||
|
// don't return here to find LATEST (if redirected) entry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$charset = false;
|
||||||
|
if (!isset($config['force_charset'])) {
|
||||||
|
if ($content_type) {
|
||||||
|
preg_match('/charset=(\S+)/', $content_type, $matches);
|
||||||
|
if (isset($matches[1]) && !empty($matches[1])) $charset = $matches[1];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// use forced charset
|
||||||
|
$charset = $config['force_charset'];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($charset && isset($config['force_unicode']) && $config['force_unicode']) {
|
||||||
|
$html = iconv($charset, 'utf-8', $html);
|
||||||
|
$charset = 'utf-8';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($charset) {
|
||||||
|
$html = '<?xml encoding="' . $charset . '">' . $html;
|
||||||
|
}
|
||||||
|
|
||||||
|
@$doc->loadHTML($html);
|
||||||
|
|
||||||
|
if ($doc) {
|
||||||
|
$basenode = false;
|
||||||
|
$xpath = new DOMXPath($doc);
|
||||||
|
$entries = $xpath->query('(//'.$config['xpath'].')'); // find main DIV according to config
|
||||||
|
|
||||||
|
if ($entries->length > 0) $basenode = $entries->item(0);
|
||||||
|
|
||||||
|
if ($basenode) {
|
||||||
|
// remove nodes from cleanup configuration
|
||||||
|
if (isset($config['cleanup'])) {
|
||||||
|
if (!is_array($config['cleanup'])) {
|
||||||
|
$config['cleanup'] = array($config['cleanup']);
|
||||||
|
}
|
||||||
|
foreach ($config['cleanup'] as $cleanup) {
|
||||||
|
$nodelist = $xpath->query('//'.$cleanup, $basenode);
|
||||||
|
foreach ($nodelist as $node) {
|
||||||
|
if ($node instanceof DOMAttr) {
|
||||||
|
$node->ownerElement->removeAttributeNode($node);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$node->parentNode->removeChild($node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $doc->saveXML($basenode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $article;
|
||||||
|
}
|
312
init.php
312
init.php
@ -1,216 +1,170 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
require_once('fetch.php');
|
||||||
|
|
||||||
class Af_Feedmod extends Plugin implements IHandler
|
class Af_Feedmod extends Plugin implements IHandler
|
||||||
{
|
{
|
||||||
private $host;
|
private $host;
|
||||||
|
|
||||||
function about()
|
private $mods;
|
||||||
{
|
private $mods_loaded = false;
|
||||||
return array(
|
|
||||||
1.0, // version
|
|
||||||
'Replace feed contents by contents from the linked page', // description
|
|
||||||
'mbirth', // author
|
|
||||||
false, // is_system
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function api_version()
|
function about()
|
||||||
{
|
{
|
||||||
return 2;
|
return array(
|
||||||
}
|
1.0, // version
|
||||||
|
'Replace feed contents by contents from the linked page', // description
|
||||||
|
'mbirth', // author
|
||||||
|
false, // is_system
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
function init($host)
|
function api_version()
|
||||||
{
|
{
|
||||||
$this->host = $host;
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
$host->add_hook($host::HOOK_PREFS_TABS, $this);
|
function init($host)
|
||||||
# only allowed for system plugins: $host->add_handler('pref-feedmod', '*', $this);
|
{
|
||||||
$host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
|
$this->host = $host;
|
||||||
}
|
|
||||||
|
|
||||||
function csrf_ignore($method)
|
$host->add_hook($host::HOOK_PREFS_TABS, $this);
|
||||||
{
|
# only allowed for system plugins: $host->add_handler('pref-feedmod', '*', $this);
|
||||||
$csrf_ignored = array("index", "edit");
|
$host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
|
||||||
return array_search($method, $csrf_ignored) !== false;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
function before($method)
|
function csrf_ignore($method)
|
||||||
{
|
{
|
||||||
if ($_SESSION["uid"]) {
|
$csrf_ignored = array("index", "edit");
|
||||||
return true;
|
return array_search($method, $csrf_ignored) !== false;
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
function after()
|
function before($method)
|
||||||
{
|
{
|
||||||
return true;
|
if ($_SESSION["uid"]) {
|
||||||
}
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
function hook_article_filter($article)
|
function after()
|
||||||
{
|
{
|
||||||
global $fetch_last_content_type;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
$json_conf = $this->host->get($this, 'json_conf');
|
function hook_article_filter($article)
|
||||||
$owner_uid = $article['owner_uid'];
|
{
|
||||||
$data = json_decode($json_conf, true);
|
global $fetch_last_content_type;
|
||||||
|
|
||||||
if (!is_array($data)) {
|
$owner_uid = $article['owner_uid'];
|
||||||
// no valid JSON or no configuration at all
|
|
||||||
return $article;
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($data as $urlpart=>$config) {
|
//
|
||||||
if (strpos($article['link'], $urlpart) === false) continue; // skip this config if URL not matching
|
// Load mods if they are not already loaded
|
||||||
if (strpos($article['plugin_data'], "feedmod,$owner_uid:") !== false) {
|
//
|
||||||
// do not process an article more than once
|
if (!$this->mods_loaded) {
|
||||||
if (isset($article['stored']['content'])) $article['content'] = $article['stored']['content'];
|
//
|
||||||
break;
|
// Reading mod files
|
||||||
}
|
//
|
||||||
|
|
||||||
switch ($config['type']) {
|
$this->mods = array();
|
||||||
case 'xpath':
|
// bad (!) hardcoded path
|
||||||
$doc = new DOMDocument();
|
$mod_files = glob('plugins/af_feedmod/mods/*.json');
|
||||||
$link = trim($article['link']);
|
foreach ($mod_files as $file) {
|
||||||
|
$json = file_get_contents($file);
|
||||||
|
$mod = json_decode($json, true);
|
||||||
|
if (json_last_error() != JSON_ERROR_NONE)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!isset($mod['match']) || !isset($mod['config']))
|
||||||
|
continue;
|
||||||
|
|
||||||
if (version_compare(VERSION, '1.7.9', '>=')) {
|
$this->mods[$mod['match']] = $mod['config'];
|
||||||
$html = fetch_file_contents($link);
|
}
|
||||||
$content_type = $fetch_last_content_type;
|
|
||||||
} else {
|
|
||||||
// fallback to file_get_contents()
|
|
||||||
$html = file_get_contents($link);
|
|
||||||
|
|
||||||
// try to fetch charset from HTTP headers
|
//
|
||||||
$headers = $http_response_header;
|
// User mods
|
||||||
$content_type = false;
|
//
|
||||||
foreach ($headers as $h) {
|
|
||||||
if (substr(strtolower($h), 0, 13) == 'content-type:') {
|
|
||||||
$content_type = substr($h, 14);
|
|
||||||
// don't break here to find LATEST (if redirected) entry
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$charset = false;
|
|
||||||
if (!isset($config['force_charset'])) {
|
|
||||||
if ($content_type) {
|
|
||||||
preg_match('/charset=(\S+)/', $content_type, $matches);
|
|
||||||
if (isset($matches[1]) && !empty($matches[1])) $charset = $matches[1];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// use forced charset
|
|
||||||
$charset = $config['force_charset'];
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($charset && isset($config['force_unicode']) && $config['force_unicode']) {
|
|
||||||
$html = iconv($charset, 'utf-8', $html);
|
|
||||||
$charset = 'utf-8';
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($charset) {
|
|
||||||
$html = '<?xml encoding="' . $charset . '">' . $html;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@$doc->loadHTML($html);
|
$json_conf = $this->host->get($this, 'json_conf');
|
||||||
|
$user_mods = json_decode($json_conf, true);
|
||||||
|
if (is_array($user_mods))
|
||||||
|
$this->mods = array_merge($this->mods, $user_mods);
|
||||||
|
|
||||||
if ($doc) {
|
$this->mods_loaded = true;
|
||||||
$basenode = false;
|
}
|
||||||
$xpath = new DOMXPath($doc);
|
|
||||||
$entries = $xpath->query('(//'.$config['xpath'].')'); // find main DIV according to config
|
|
||||||
|
|
||||||
if ($entries->length > 0) $basenode = $entries->item(0);
|
// article is already fetched
|
||||||
|
if (strpos($article['plugin_data'], "feedmod,$owner_uid:") !== false && isset($article['stored']['content']))
|
||||||
|
{
|
||||||
|
$article['content'] = $article['stored']['content'];
|
||||||
|
return $article;
|
||||||
|
}
|
||||||
|
|
||||||
if ($basenode) {
|
foreach ($this->mods as $urlpart=>$config) {
|
||||||
// remove nodes from cleanup configuration
|
if (strpos($article['link'], $urlpart) === false) continue;
|
||||||
if (isset($config['cleanup'])) {
|
|
||||||
if (!is_array($config['cleanup'])) {
|
|
||||||
$config['cleanup'] = array($config['cleanup']);
|
|
||||||
}
|
|
||||||
foreach ($config['cleanup'] as $cleanup) {
|
|
||||||
$nodelist = $xpath->query('//'.$cleanup, $basenode);
|
|
||||||
foreach ($nodelist as $node) {
|
|
||||||
if ($node instanceof DOMAttr) {
|
|
||||||
$node->ownerElement->removeAttributeNode($node);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$node->parentNode->removeChild($node);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$article['content'] = $doc->saveXML($basenode);
|
|
||||||
$article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
$content = fetch_article($article, $config);
|
||||||
// unknown type or invalid config
|
if (!$content)
|
||||||
continue;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
break; // if we got here, we found the correct entry in $data, do not process more
|
$article['content'] = $content;
|
||||||
}
|
$article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
|
||||||
|
}
|
||||||
|
|
||||||
return $article;
|
return $article;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hook_prefs_tabs($args)
|
function hook_prefs_tabs($args)
|
||||||
{
|
{
|
||||||
print '<div id="feedmodConfigTab" dojoType="dijit.layout.ContentPane"
|
print '<div id="feedmodConfigTab" dojoType="dijit.layout.ContentPane"
|
||||||
href="backend.php?op=af_feedmod"
|
href="backend.php?op=af_feedmod"
|
||||||
title="' . __('FeedMod') . '"></div>';
|
title="' . __('FeedMod') . '"></div>';
|
||||||
}
|
}
|
||||||
|
|
||||||
function index()
|
function index()
|
||||||
{
|
{
|
||||||
$pluginhost = PluginHost::getInstance();
|
$pluginhost = PluginHost::getInstance();
|
||||||
$json_conf = $pluginhost->get($this, 'json_conf');
|
$json_conf = $pluginhost->get($this, 'json_conf');
|
||||||
|
|
||||||
print "<form dojoType=\"dijit.form.Form\">";
|
print "<form dojoType=\"dijit.form.Form\">";
|
||||||
|
|
||||||
print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\">
|
print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\">
|
||||||
evt.preventDefault();
|
evt.preventDefault();
|
||||||
if (this.validate()) {
|
if (this.validate()) {
|
||||||
new Ajax.Request('backend.php', {
|
new Ajax.Request('backend.php', {
|
||||||
parameters: dojo.objectToQuery(this.getValues()),
|
parameters: dojo.objectToQuery(this.getValues()),
|
||||||
onComplete: function(transport) {
|
onComplete: function(transport) {
|
||||||
if (transport.responseText.indexOf('error')>=0) notify_error(transport.responseText);
|
if (transport.responseText.indexOf('error')>=0) notify_error(transport.responseText);
|
||||||
else notify_info(transport.responseText);
|
else notify_info(transport.responseText);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
//this.reset();
|
//this.reset();
|
||||||
}
|
}
|
||||||
</script>";
|
</script>";
|
||||||
|
|
||||||
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"op\" value=\"pluginhandler\">";
|
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"op\" value=\"pluginhandler\">";
|
||||||
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"method\" value=\"save\">";
|
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"method\" value=\"save\">";
|
||||||
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"plugin\" value=\"af_feedmod\">";
|
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"plugin\" value=\"af_feedmod\">";
|
||||||
|
|
||||||
print "<table width='100%'><tr><td>";
|
print "<table width='100%'><tr><td>";
|
||||||
print "<textarea dojoType=\"dijit.form.SimpleTextarea\" name=\"json_conf\" style=\"font-size: 12px; width: 99%; height: 500px;\">$json_conf</textarea>";
|
print "<textarea dojoType=\"dijit.form.SimpleTextarea\" name=\"json_conf\" style=\"font-size: 12px; width: 99%; height: 500px;\">$json_conf</textarea>";
|
||||||
print "</td></tr></table>";
|
print "</td></tr></table>";
|
||||||
|
|
||||||
print "<p><button dojoType=\"dijit.form.Button\" type=\"submit\">".__("Save")."</button>";
|
print "<p><button dojoType=\"dijit.form.Button\" type=\"submit\">".__("Save")."</button>";
|
||||||
|
|
||||||
print "</form>";
|
print "</form>";
|
||||||
}
|
}
|
||||||
|
|
||||||
function save()
|
function save()
|
||||||
{
|
{
|
||||||
$json_conf = $_POST['json_conf'];
|
$json_conf = $_POST['json_conf'];
|
||||||
|
|
||||||
if (is_null(json_decode($json_conf))) {
|
if (is_null(json_decode($json_conf))) {
|
||||||
echo __("error: Invalid JSON!");
|
echo __("error: Invalid JSON!");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->host->set($this, 'json_conf', $json_conf);
|
|
||||||
echo __("Configuration saved.");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
$this->host->set($this, 'json_conf', $json_conf);
|
||||||
|
echo __("Configuration saved.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
12
mods/4player.de.json
Normal file
12
mods/4player.de.json
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"name": "4players.de",
|
||||||
|
"author": "boxdot",
|
||||||
|
"feed": "http://feeds.4players.de/Allgemein/news/-/rss.xml",
|
||||||
|
"match": "4players.de",
|
||||||
|
"config": {
|
||||||
|
"type": "xpath",
|
||||||
|
"xpath": "article",
|
||||||
|
"force_charset": "utf-8",
|
||||||
|
"cleanup": ["header", "footer", "div[contains(@class, 'social-facebook')]"]
|
||||||
|
}
|
||||||
|
}
|
11
mods/visions.de.json
Normal file
11
mods/visions.de.json
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"name": "VISIONS.de",
|
||||||
|
"author": "boxdot",
|
||||||
|
"feed": "http://rss.feedsportal.com/c/32350/f/443184/index.rss",
|
||||||
|
"match": "visions0Bde0C",
|
||||||
|
"config": {
|
||||||
|
"type": "xpath",
|
||||||
|
"xpath": "div[contains(@class, 'marginbt')]",
|
||||||
|
"force_charset": "utf-8"
|
||||||
|
}
|
||||||
|
}
|
@ -1,33 +1,37 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
$config = array(
|
require_once('../fetch.php');
|
||||||
'type' => 'xpath',
|
|
||||||
'xpath' => 'div[@itemprop="articleBody"]',
|
|
||||||
);
|
|
||||||
|
|
||||||
$article = array(
|
if (count($argv) <= 2) {
|
||||||
'link' => 'http://www.der-postillon.com/2013/04/nordkoreas-armee-nach-wochenlangem.html',
|
echo 'USAGE: php fetch.php [mod_file] [article_url]' . PHP_EOL;
|
||||||
'content' => 'This is the feed content',
|
exit(1);
|
||||||
'plugin_data' => '',
|
|
||||||
);
|
|
||||||
|
|
||||||
$doc = new DOMDocument();
|
|
||||||
$html = file_get_contents($article['link']);
|
|
||||||
$doc->loadHTML($html);
|
|
||||||
|
|
||||||
if ($doc) {
|
|
||||||
$basenode = false;
|
|
||||||
$xpath = new DOMXPath($doc);
|
|
||||||
$entries = $xpath->query('(//'.$config['xpath'].')'); // find main DIV according to config
|
|
||||||
|
|
||||||
var_dump($entries);
|
|
||||||
|
|
||||||
if ($entries->length > 0) $basenode = $entries->item(0);
|
|
||||||
|
|
||||||
if ($basenode) {
|
|
||||||
$article['content'] = $doc->saveXML($basenode);
|
|
||||||
$article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
print_r($article);
|
$mod = $argv[1];
|
||||||
|
$article_url = $argv[2];
|
||||||
|
|
||||||
|
//
|
||||||
|
// Getting json config
|
||||||
|
//
|
||||||
|
|
||||||
|
$json = file_get_contents($mod);
|
||||||
|
$data = json_decode($json, true);
|
||||||
|
|
||||||
|
echo "<pre>";
|
||||||
|
print_r($data);
|
||||||
|
echo "</pre>";
|
||||||
|
|
||||||
|
if (json_last_error() != JSON_ERROR_NONE) {
|
||||||
|
echo 'Json error' . PHP_EOL;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
$config = $data['config'];
|
||||||
|
|
||||||
|
//
|
||||||
|
// Fetching article
|
||||||
|
//
|
||||||
|
|
||||||
|
$owner_uid = 100;
|
||||||
|
$article = array( 'link' => $article_url, 'plugin_data' => '' );
|
||||||
|
echo fetch_article($article, $config)['content'];
|
||||||
|
Reference in New Issue
Block a user