1
0

Refactoring of init.php

- fetching is now done by a global function in fetch.php. This
  allows to use this functions in test/xpath.php for testing.
- besides of user configuration as explained in README.d json
  files from `mods` are now also loaded and used for fetching
  articles
- loading of json files is done only one time and not for each
  article to process
- test/xpath.php is now a script, which can be used to test a
  configuration from `mods`-directory

Added two mods for 4players and visions.de.

TODO: rewrite README.md
This commit is contained in:
d
2014-05-15 12:42:56 +02:00
parent f668480416
commit 5448bd752d
5 changed files with 274 additions and 207 deletions

86
fetch.php Normal file
View File

@ -0,0 +1,86 @@
<?php
//
// Main global function for fetching and processing articles
//
function fetch_article($article, $config) {
// the only config type supported now is 'xpath'
if ($config['type'] != 'xpath')
return false;
$doc = new DOMDocument();
$link = trim($article['link']);
if (defined('VERSION') && version_compare(VERSION, '1.7.9', '>=')) {
$html = fetch_file_contents($link);
$content_type = $fetch_last_content_type;
}
else {
// fallback to file_get_contents()
$html = file_get_contents($link);
// try to fetch charset from HTTP headers
$headers = $http_response_header;
$content_type = false;
foreach ($headers as $h) {
if (substr(strtolower($h), 0, 13) == 'content-type:') {
$content_type = substr($h, 14);
// don't return here to find LATEST (if redirected) entry
}
}
}
$charset = false;
if (!isset($config['force_charset'])) {
if ($content_type) {
preg_match('/charset=(\S+)/', $content_type, $matches);
if (isset($matches[1]) && !empty($matches[1])) $charset = $matches[1];
}
} else {
// use forced charset
$charset = $config['force_charset'];
}
if ($charset && isset($config['force_unicode']) && $config['force_unicode']) {
$html = iconv($charset, 'utf-8', $html);
$charset = 'utf-8';
}
if ($charset) {
$html = '<?xml encoding="' . $charset . '">' . $html;
}
@$doc->loadHTML($html);
if ($doc) {
$basenode = false;
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//'.$config['xpath'].')'); // find main DIV according to config
if ($entries->length > 0) $basenode = $entries->item(0);
if ($basenode) {
// remove nodes from cleanup configuration
if (isset($config['cleanup'])) {
if (!is_array($config['cleanup'])) {
$config['cleanup'] = array($config['cleanup']);
}
foreach ($config['cleanup'] as $cleanup) {
$nodelist = $xpath->query('//'.$cleanup, $basenode);
foreach ($nodelist as $node) {
if ($node instanceof DOMAttr) {
$node->ownerElement->removeAttributeNode($node);
}
else {
$node->parentNode->removeChild($node);
}
}
}
}
return $doc->saveXML($basenode);
}
}
return $article;
}

312
init.php
View File

@ -1,216 +1,170 @@
<?php <?php
require_once('fetch.php');
class Af_Feedmod extends Plugin implements IHandler class Af_Feedmod extends Plugin implements IHandler
{ {
private $host; private $host;
function about() private $mods;
{ private $mods_loaded = false;
return array(
1.0, // version
'Replace feed contents by contents from the linked page', // description
'mbirth', // author
false, // is_system
);
}
function api_version() function about()
{ {
return 2; return array(
} 1.0, // version
'Replace feed contents by contents from the linked page', // description
'mbirth', // author
false, // is_system
);
}
function init($host) function api_version()
{ {
$this->host = $host; return 2;
}
$host->add_hook($host::HOOK_PREFS_TABS, $this); function init($host)
# only allowed for system plugins: $host->add_handler('pref-feedmod', '*', $this); {
$host->add_hook($host::HOOK_ARTICLE_FILTER, $this); $this->host = $host;
}
function csrf_ignore($method) $host->add_hook($host::HOOK_PREFS_TABS, $this);
{ # only allowed for system plugins: $host->add_handler('pref-feedmod', '*', $this);
$csrf_ignored = array("index", "edit"); $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
return array_search($method, $csrf_ignored) !== false; }
}
function before($method) function csrf_ignore($method)
{ {
if ($_SESSION["uid"]) { $csrf_ignored = array("index", "edit");
return true; return array_search($method, $csrf_ignored) !== false;
} }
return false;
}
function after() function before($method)
{ {
return true; if ($_SESSION["uid"]) {
} return true;
}
return false;
}
function hook_article_filter($article) function after()
{ {
global $fetch_last_content_type; return true;
}
$json_conf = $this->host->get($this, 'json_conf'); function hook_article_filter($article)
$owner_uid = $article['owner_uid']; {
$data = json_decode($json_conf, true); global $fetch_last_content_type;
if (!is_array($data)) { $owner_uid = $article['owner_uid'];
// no valid JSON or no configuration at all
return $article;
}
foreach ($data as $urlpart=>$config) { //
if (strpos($article['link'], $urlpart) === false) continue; // skip this config if URL not matching // Load mods if they are not already loaded
if (strpos($article['plugin_data'], "feedmod,$owner_uid:") !== false) { //
// do not process an article more than once if (!$this->mods_loaded) {
if (isset($article['stored']['content'])) $article['content'] = $article['stored']['content']; //
break; // Reading mod files
} //
switch ($config['type']) { $this->mods = array();
case 'xpath': // bad (!) hardcoded path
$doc = new DOMDocument(); $mod_files = glob('plugins/af_feedmod/mods/*.json');
$link = trim($article['link']); foreach ($mod_files as $file) {
$json = file_get_contents($file);
$mod = json_decode($json, true);
if (json_last_error() != JSON_ERROR_NONE)
continue;
if (!isset($mod['match']) || !isset($mod['config']))
continue;
if (version_compare(VERSION, '1.7.9', '>=')) { $this->mods[$mod['match']] = $mod['config'];
$html = fetch_file_contents($link); }
$content_type = $fetch_last_content_type;
} else {
// fallback to file_get_contents()
$html = file_get_contents($link);
// try to fetch charset from HTTP headers //
$headers = $http_response_header; // User mods
$content_type = false; //
foreach ($headers as $h) {
if (substr(strtolower($h), 0, 13) == 'content-type:') {
$content_type = substr($h, 14);
// don't break here to find LATEST (if redirected) entry
}
}
}
$charset = false;
if (!isset($config['force_charset'])) {
if ($content_type) {
preg_match('/charset=(\S+)/', $content_type, $matches);
if (isset($matches[1]) && !empty($matches[1])) $charset = $matches[1];
}
} else {
// use forced charset
$charset = $config['force_charset'];
}
if ($charset && isset($config['force_unicode']) && $config['force_unicode']) {
$html = iconv($charset, 'utf-8', $html);
$charset = 'utf-8';
}
if ($charset) {
$html = '<?xml encoding="' . $charset . '">' . $html;
}
@$doc->loadHTML($html); $json_conf = $this->host->get($this, 'json_conf');
$user_mods = json_decode($json_conf, true);
if (is_array($user_mods))
$this->mods = array_merge($this->mods, $user_mods);
if ($doc) { $this->mods_loaded = true;
$basenode = false; }
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//'.$config['xpath'].')'); // find main DIV according to config
if ($entries->length > 0) $basenode = $entries->item(0); // article is already fetched
if (strpos($article['plugin_data'], "feedmod,$owner_uid:") !== false && isset($article['stored']['content']))
{
$article['content'] = $article['stored']['content'];
return $article;
}
if ($basenode) { foreach ($this->mods as $urlpart=>$config) {
// remove nodes from cleanup configuration if (strpos($article['link'], $urlpart) === false) continue;
if (isset($config['cleanup'])) {
if (!is_array($config['cleanup'])) {
$config['cleanup'] = array($config['cleanup']);
}
foreach ($config['cleanup'] as $cleanup) {
$nodelist = $xpath->query('//'.$cleanup, $basenode);
foreach ($nodelist as $node) {
if ($node instanceof DOMAttr) {
$node->ownerElement->removeAttributeNode($node);
}
else {
$node->parentNode->removeChild($node);
}
}
}
}
$article['content'] = $doc->saveXML($basenode);
$article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
}
}
break;
default: $content = fetch_article($article, $config);
// unknown type or invalid config if (!$content)
continue; break;
}
break; // if we got here, we found the correct entry in $data, do not process more $article['content'] = $content;
} $article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
}
return $article; return $article;
} }
function hook_prefs_tabs($args) function hook_prefs_tabs($args)
{ {
print '<div id="feedmodConfigTab" dojoType="dijit.layout.ContentPane" print '<div id="feedmodConfigTab" dojoType="dijit.layout.ContentPane"
href="backend.php?op=af_feedmod" href="backend.php?op=af_feedmod"
title="' . __('FeedMod') . '"></div>'; title="' . __('FeedMod') . '"></div>';
} }
function index() function index()
{ {
$pluginhost = PluginHost::getInstance(); $pluginhost = PluginHost::getInstance();
$json_conf = $pluginhost->get($this, 'json_conf'); $json_conf = $pluginhost->get($this, 'json_conf');
print "<form dojoType=\"dijit.form.Form\">"; print "<form dojoType=\"dijit.form.Form\">";
print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\"> print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\">
evt.preventDefault(); evt.preventDefault();
if (this.validate()) { if (this.validate()) {
new Ajax.Request('backend.php', { new Ajax.Request('backend.php', {
parameters: dojo.objectToQuery(this.getValues()), parameters: dojo.objectToQuery(this.getValues()),
onComplete: function(transport) { onComplete: function(transport) {
if (transport.responseText.indexOf('error')>=0) notify_error(transport.responseText); if (transport.responseText.indexOf('error')>=0) notify_error(transport.responseText);
else notify_info(transport.responseText); else notify_info(transport.responseText);
} }
}); });
//this.reset(); //this.reset();
} }
</script>"; </script>";
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"op\" value=\"pluginhandler\">"; print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"op\" value=\"pluginhandler\">";
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"method\" value=\"save\">"; print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"method\" value=\"save\">";
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"plugin\" value=\"af_feedmod\">"; print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"plugin\" value=\"af_feedmod\">";
print "<table width='100%'><tr><td>"; print "<table width='100%'><tr><td>";
print "<textarea dojoType=\"dijit.form.SimpleTextarea\" name=\"json_conf\" style=\"font-size: 12px; width: 99%; height: 500px;\">$json_conf</textarea>"; print "<textarea dojoType=\"dijit.form.SimpleTextarea\" name=\"json_conf\" style=\"font-size: 12px; width: 99%; height: 500px;\">$json_conf</textarea>";
print "</td></tr></table>"; print "</td></tr></table>";
print "<p><button dojoType=\"dijit.form.Button\" type=\"submit\">".__("Save")."</button>"; print "<p><button dojoType=\"dijit.form.Button\" type=\"submit\">".__("Save")."</button>";
print "</form>"; print "</form>";
} }
function save() function save()
{ {
$json_conf = $_POST['json_conf']; $json_conf = $_POST['json_conf'];
if (is_null(json_decode($json_conf))) { if (is_null(json_decode($json_conf))) {
echo __("error: Invalid JSON!"); echo __("error: Invalid JSON!");
return false; return false;
} }
$this->host->set($this, 'json_conf', $json_conf);
echo __("Configuration saved.");
}
$this->host->set($this, 'json_conf', $json_conf);
echo __("Configuration saved.");
}
} }

12
mods/4player.de.json Normal file
View File

@ -0,0 +1,12 @@
{
"name": "4players.de",
"author": "boxdot",
"feed": "http://feeds.4players.de/Allgemein/news/-/rss.xml",
"match": "4players.de",
"config": {
"type": "xpath",
"xpath": "article",
"force_charset": "utf-8",
"cleanup": ["header", "footer", "div[contains(@class, 'social-facebook')]"]
}
}

11
mods/visions.de.json Normal file
View File

@ -0,0 +1,11 @@
{
"name": "VISIONS.de",
"author": "boxdot",
"feed": "http://rss.feedsportal.com/c/32350/f/443184/index.rss",
"match": "visions0Bde0C",
"config": {
"type": "xpath",
"xpath": "div[contains(@class, 'marginbt')]",
"force_charset": "utf-8"
}
}

View File

@ -1,33 +1,37 @@
<?php <?php
$config = array( require_once('../fetch.php');
'type' => 'xpath',
'xpath' => 'div[@itemprop="articleBody"]',
);
$article = array( if (count($argv) <= 2) {
'link' => 'http://www.der-postillon.com/2013/04/nordkoreas-armee-nach-wochenlangem.html', echo 'USAGE: php fetch.php [mod_file] [article_url]' . PHP_EOL;
'content' => 'This is the feed content', exit(1);
'plugin_data' => '',
);
$doc = new DOMDocument();
$html = file_get_contents($article['link']);
$doc->loadHTML($html);
if ($doc) {
$basenode = false;
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//'.$config['xpath'].')'); // find main DIV according to config
var_dump($entries);
if ($entries->length > 0) $basenode = $entries->item(0);
if ($basenode) {
$article['content'] = $doc->saveXML($basenode);
$article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
}
} }
print_r($article); $mod = $argv[1];
$article_url = $argv[2];
//
// Getting json config
//
$json = file_get_contents($mod);
$data = json_decode($json, true);
echo "<pre>";
print_r($data);
echo "</pre>";
if (json_last_error() != JSON_ERROR_NONE) {
echo 'Json error' . PHP_EOL;
exit(1);
}
$config = $data['config'];
//
// Fetching article
//
$owner_uid = 100;
$article = array( 'link' => $article_url, 'plugin_data' => '' );
echo fetch_article($article, $config)['content'];