diff --git a/README.md b/README.md index ed7365b..de1a410 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,8 @@ A configuration looks like this: }, "blog.beetlebum.de": { "type": "xpath", - "xpath": "div[@class='entry-content']" + "xpath": "div[@class='entry-content']", + "cleanup": [ "header", "footer" ], } } @@ -62,6 +63,8 @@ The *array key* is part of the URL of the article links(!). You'll notice the `g The **xpath** value is the actual Xpath-element to fetch from the linked page. Omit the leading `//` - they will get prepended automatically. +If **type** was set to `xpath` there is an additional option **cleanup** available. Its an array of Xpath-elements (relative to the fetched node) to remove from the fetched node. Omit the leading `//` - they will get prepended automatically. + **force_charset** allows to override automatic charset detection. If it is omitted, the charset will be parsed from the HTTP headers or loadHTML() will decide on its own. diff --git a/init.php b/init.php index 3b65ce3..e99744a 100644 --- a/init.php +++ b/init.php @@ -116,6 +116,15 @@ class Af_Feedmod extends Plugin implements IHandler if ($entries->length > 0) $basenode = $entries->item(0); if ($basenode) { + // remove nodes from cleanup configuration + if (isset($config['cleanup'])) { + foreach ($config['cleanup'] as $cleanup) { + $nodelist = $xpath->query('//'.$cleanup, $basenode); + foreach ($nodelist as $node) { + $node->parentNode->removeChild($node); + } + } + } $article['content'] = $doc->saveXML($basenode); $article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data']; } diff --git a/mods/jojosblog.json b/mods/jojosblog.json index dc8f6fc..a81029a 100644 --- a/mods/jojosblog.json +++ b/mods/jojosblog.json @@ -1,11 +1,12 @@ { "name": "Jojo's illustrierter Blog", "author": "Markus Birth", - "stamp": 1369500284, + "stamp": 1371635271, "feed": "http://blog.beetlebum.de/feed/", "match": "blog.beetlebum.de", "config": { "type": "xpath", - "xpath": "div[@class='entry-content']" + "xpath": "div[@id='content']/article", + "cleanup": [ "header", "footer" ] } } diff --git a/mods/theoatmeal.json b/mods/theoatmeal.json index 628af3c..418ce10 100644 --- a/mods/theoatmeal.json +++ b/mods/theoatmeal.json @@ -1,11 +1,12 @@ { "name": "The Oatmeal", "author": "Markus Birth", - "stamp": 1369500284, + "stamp": 1371635271, "feed": "http://theoatmeal.com/feed/rss", "match": "oatmeal", "config": { "type": "xpath", - "xpath": "div[@id='comic']" + "xpath": "div[@id='comic']", + "cleanup": [ "div[@id='content_footer2']" ] } }