Merge pull request #9 from rangerer/cleanup
added new cleanup feature by rangerer, thanks!
This commit is contained in:
@ -50,7 +50,8 @@ A configuration looks like this:
|
|||||||
},
|
},
|
||||||
"blog.beetlebum.de": {
|
"blog.beetlebum.de": {
|
||||||
"type": "xpath",
|
"type": "xpath",
|
||||||
"xpath": "div[@class='entry-content']"
|
"xpath": "div[@class='entry-content']",
|
||||||
|
"cleanup": [ "header", "footer" ],
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -62,6 +63,8 @@ The *array key* is part of the URL of the article links(!). You'll notice the `g
|
|||||||
|
|
||||||
The **xpath** value is the actual Xpath-element to fetch from the linked page. Omit the leading `//` - they will get prepended automatically.
|
The **xpath** value is the actual Xpath-element to fetch from the linked page. Omit the leading `//` - they will get prepended automatically.
|
||||||
|
|
||||||
|
If **type** was set to `xpath` there is an additional option **cleanup** available. Its an array of Xpath-elements (relative to the fetched node) to remove from the fetched node. Omit the leading `//` - they will get prepended automatically.
|
||||||
|
|
||||||
**force_charset** allows to override automatic charset detection. If it is omitted, the charset will be parsed from the HTTP headers or loadHTML() will decide on its own.
|
**force_charset** allows to override automatic charset detection. If it is omitted, the charset will be parsed from the HTTP headers or loadHTML() will decide on its own.
|
||||||
|
|
||||||
|
|
||||||
|
9
init.php
9
init.php
@ -116,6 +116,15 @@ class Af_Feedmod extends Plugin implements IHandler
|
|||||||
if ($entries->length > 0) $basenode = $entries->item(0);
|
if ($entries->length > 0) $basenode = $entries->item(0);
|
||||||
|
|
||||||
if ($basenode) {
|
if ($basenode) {
|
||||||
|
// remove nodes from cleanup configuration
|
||||||
|
if (isset($config['cleanup'])) {
|
||||||
|
foreach ($config['cleanup'] as $cleanup) {
|
||||||
|
$nodelist = $xpath->query('//'.$cleanup, $basenode);
|
||||||
|
foreach ($nodelist as $node) {
|
||||||
|
$node->parentNode->removeChild($node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
$article['content'] = $doc->saveXML($basenode);
|
$article['content'] = $doc->saveXML($basenode);
|
||||||
$article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
|
$article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "Jojo's illustrierter Blog",
|
"name": "Jojo's illustrierter Blog",
|
||||||
"author": "Markus Birth",
|
"author": "Markus Birth",
|
||||||
"stamp": 1369500284,
|
"stamp": 1371635271,
|
||||||
"feed": "http://blog.beetlebum.de/feed/",
|
"feed": "http://blog.beetlebum.de/feed/",
|
||||||
"match": "blog.beetlebum.de",
|
"match": "blog.beetlebum.de",
|
||||||
"config": {
|
"config": {
|
||||||
"type": "xpath",
|
"type": "xpath",
|
||||||
"xpath": "div[@class='entry-content']"
|
"xpath": "div[@id='content']/article",
|
||||||
|
"cleanup": [ "header", "footer" ]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "The Oatmeal",
|
"name": "The Oatmeal",
|
||||||
"author": "Markus Birth",
|
"author": "Markus Birth",
|
||||||
"stamp": 1369500284,
|
"stamp": 1371635271,
|
||||||
"feed": "http://theoatmeal.com/feed/rss",
|
"feed": "http://theoatmeal.com/feed/rss",
|
||||||
"match": "oatmeal",
|
"match": "oatmeal",
|
||||||
"config": {
|
"config": {
|
||||||
"type": "xpath",
|
"type": "xpath",
|
||||||
"xpath": "div[@id='comic']"
|
"xpath": "div[@id='comic']",
|
||||||
|
"cleanup": [ "div[@id='content_footer2']" ]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user