added cleanup option for nodes fetched via xpath

2013-06-19 11:31:31 +02:00
parent ff7ae3c171
commit 64a06e0c87
2 changed files with 13 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -50,7 +50,8 @@ A configuration looks like this:
 },
 "blog.beetlebum.de": {
    "type": "xpath",
-    "xpath": "div[@class='entry-content']"
+    "xpath": "div[@class='entry-content']",
+    "cleanup": [ "header", "footer" ],
 }

 }
@ -62,6 +63,8 @@ The *array key* is part of the URL of the article links(!). You'll notice the `g

 The **xpath** value is the actual Xpath-element to fetch from the linked page. Omit the leading `//` - they will get prepended automatically.

+If **type** was set to `xpath' there is an additional option **cleanup** available. Its an array of Xpath-elements (relative to the fetched node) to remove from the fetched node. Omit the leading `//` - they will get prepended automatically.
+
 **force_charset** allows to override automatic charset detection. If it is omitted, the charset will be parsed from the HTTP headers or loadHTML() will decide on its own.


--- a/init.php
+++ b/init.php
@ -116,6 +116,15 @@ class Af_Feedmod extends Plugin implements IHandler
                        if ($entries->length > 0) $basenode = $entries->item(0);

                        if ($basenode) {
+                            // remove nodes from cleanup configuration
+                            if (isset($config['cleanup'])) {
+                                foreach ($config['cleanup'] as $cleanup) {
+                                    $nodelist = $xpath->query('//'.$cleanup, $basenode);
+                                    foreach ($nodelist as $node) {
+                                        $node->parentNode->removeChild($node);
+                                    }
+                                }
+                            }
                            $article['content'] = $doc->saveXML($basenode);
                            $article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
                        }