From 64a06e0c873b4f6429e130c66056a39d72c5c575 Mon Sep 17 00:00:00 2001
From: Roland Angerer <dev@rangerer.at>
Date: Wed, 19 Jun 2013 11:31:31 +0200
Subject: [PATCH 1/3] added cleanup option for nodes fetched via xpath

---
 README.md | 5 ++++-
 init.php  | 9 +++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ed7365b..7c529ab 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,8 @@ A configuration looks like this:
 },
 "blog.beetlebum.de": {
     "type": "xpath",
-    "xpath": "div[@class='entry-content']"
+    "xpath": "div[@class='entry-content']",
+    "cleanup": [ "header", "footer" ],
 }
 
 }
@@ -62,6 +63,8 @@ The *array key* is part of the URL of the article links(!). You'll notice the `g
 
 The **xpath** value is the actual Xpath-element to fetch from the linked page. Omit the leading `//` - they will get prepended automatically.
 
+If **type** was set to `xpath' there is an additional option **cleanup** available. Its an array of Xpath-elements (relative to the fetched node) to remove from the fetched node. Omit the leading `//` - they will get prepended automatically.
+
 **force_charset** allows to override automatic charset detection. If it is omitted, the charset will be parsed from the HTTP headers or loadHTML() will decide on its own.
 
 
diff --git a/init.php b/init.php
index 3b65ce3..e99744a 100644
--- a/init.php
+++ b/init.php
@@ -116,6 +116,15 @@ class Af_Feedmod extends Plugin implements IHandler
                         if ($entries->length > 0) $basenode = $entries->item(0);
 
                         if ($basenode) {
+                            // remove nodes from cleanup configuration
+                            if (isset($config['cleanup'])) {
+                                foreach ($config['cleanup'] as $cleanup) {
+                                    $nodelist = $xpath->query('//'.$cleanup, $basenode);
+                                    foreach ($nodelist as $node) {
+                                        $node->parentNode->removeChild($node);
+                                    }
+                                }
+                            }
                             $article['content'] = $doc->saveXML($basenode);
                             $article['plugin_data'] = "feedmod,$owner_uid:" . $article['plugin_data'];
                         }

From cb589fdd7905a27118c3251ea29f6c4cdca9f571 Mon Sep 17 00:00:00 2001
From: Roland Angerer <dev@rangerer.at>
Date: Wed, 19 Jun 2013 11:53:40 +0200
Subject: [PATCH 2/3] added cleanup config to the following mods - jojosblog -
 theoatmeal

---
 mods/jojosblog.json  | 5 +++--
 mods/theoatmeal.json | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/mods/jojosblog.json b/mods/jojosblog.json
index dc8f6fc..a81029a 100644
--- a/mods/jojosblog.json
+++ b/mods/jojosblog.json
@@ -1,11 +1,12 @@
 {
     "name": "Jojo's illustrierter Blog",
     "author": "Markus Birth",
-    "stamp": 1369500284,
+    "stamp": 1371635271,
     "feed": "http://blog.beetlebum.de/feed/",
     "match": "blog.beetlebum.de",
     "config": {
         "type": "xpath",
-        "xpath": "div[@class='entry-content']"
+        "xpath": "div[@id='content']/article",
+        "cleanup": [ "header", "footer" ]
     }
 }
diff --git a/mods/theoatmeal.json b/mods/theoatmeal.json
index 628af3c..418ce10 100644
--- a/mods/theoatmeal.json
+++ b/mods/theoatmeal.json
@@ -1,11 +1,12 @@
 {
     "name": "The Oatmeal",
     "author": "Markus Birth",
-    "stamp": 1369500284,
+    "stamp": 1371635271,
     "feed": "http://theoatmeal.com/feed/rss",
     "match": "oatmeal",
     "config": {
         "type": "xpath",
-        "xpath": "div[@id='comic']"
+        "xpath": "div[@id='comic']",
+        "cleanup": [ "div[@id='content_footer2']" ]
     }
 }

From 9c8da97a1ab8eca80c8b579816e4d287cff6f840 Mon Sep 17 00:00:00 2001
From: Roland Angerer <dev@rangerer.at>
Date: Wed, 19 Jun 2013 16:47:02 +0200
Subject: [PATCH 3/3] bugfix: corrected README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7c529ab..de1a410 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ The *array key* is part of the URL of the article links(!). You'll notice the `g
 
 The **xpath** value is the actual Xpath-element to fetch from the linked page. Omit the leading `//` - they will get prepended automatically.
 
-If **type** was set to `xpath' there is an additional option **cleanup** available. Its an array of Xpath-elements (relative to the fetched node) to remove from the fetched node. Omit the leading `//` - they will get prepended automatically.
+If **type** was set to `xpath` there is an additional option **cleanup** available. Its an array of Xpath-elements (relative to the fetched node) to remove from the fetched node. Omit the leading `//` - they will get prepended automatically.
 
 **force_charset** allows to override automatic charset detection. If it is omitted, the charset will be parsed from the HTTP headers or loadHTML() will decide on its own.