From 5a5728ef4d0b0291b039cf1942e87cc8d621d00b Mon Sep 17 00:00:00 2001 From: Markus Birth Date: Thu, 11 Apr 2013 15:56:46 +0200 Subject: [PATCH] XPath explanation in README, some new mods --- README.md | 51 +++++++++++++++++++++++++++++++ mods/der-postillon.json | 9 ++++++ mods/kojote-magazin.json | 9 ++++++ mods/n24.de.json | 2 +- mods/polizei-brandenburg-hvl.json | 9 ++++++ tests/xpath.php | 4 +-- 6 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 mods/der-postillon.json create mode 100644 mods/kojote-magazin.json create mode 100644 mods/polizei-brandenburg-hvl.json diff --git a/README.md b/README.md index 147e483..d794093 100644 --- a/README.md +++ b/README.md @@ -66,3 +66,54 @@ The **xpath** value is the actual Xpath-element to fetch from the linked page. O If you get an error about "Invalid JSON!", you can use [JSONLint](http://jsonlint.com/) to locate the erroneous part. + + +XPath +----- + +### Tools + +To test your XPath expressions, you can use these Chrome extensions: + +* [XPath Helper](https://chrome.google.com/webstore/detail/xpath-helper/hgimnogjllphhhkhlmebbmlgjoejdpjl) +* [xPath Viewer](https://chrome.google.com/webstore/detail/xpath-viewer/oemacabgcknpcikelclomjajcdpbilpf) +* [xpathOnClick](https://chrome.google.com/webstore/detail/xpathonclick/ikbfbhbdjpjnalaooidkdbgjknhghhbo) + + +### Examples + +Some XPath expressions you could need (the `//` is automatically prepended and must be omitted in the FeedMod configuration): + +##### HTML5
tag + +```html +
…article…
+``` + +```xslt +//article +``` + +##### DIV inside DIV + +```html +
…article…
` +``` + +```xslt +//div[@id='content']/div[@class='box_content'] +``` + +##### Multiple classes + +```html +
…article…
+``` + +```xslt +//div[starts-with(@class ,'post-body')] +``` +or +```xslt +//div[contains(@class, 'entry-content')] +``` diff --git a/mods/der-postillon.json b/mods/der-postillon.json new file mode 100644 index 0000000..9f01f13 --- /dev/null +++ b/mods/der-postillon.json @@ -0,0 +1,9 @@ +{ + "name": "Der Postillon", + "feed": "http://www.der-postillon.com/feeds/posts/default", + "match": "blogspot/rkEL", + "config": { + "type": "xpath", + "xpath": "div[@itemprop='articleBody']" + } +} diff --git a/mods/kojote-magazin.json b/mods/kojote-magazin.json new file mode 100644 index 0000000..e331d51 --- /dev/null +++ b/mods/kojote-magazin.json @@ -0,0 +1,9 @@ +{ + "name": "Der Kojote", + "feed": "http://www.kojote-magazin.de/feed/rss2", + "match": "kojote-magazin.de", + "config": { + "type": "xpath", + "xpath": "div[@class='post']" + } +} \ No newline at end of file diff --git a/mods/n24.de.json b/mods/n24.de.json index 691c841..670a656 100644 --- a/mods/n24.de.json +++ b/mods/n24.de.json @@ -4,6 +4,6 @@ "match": "n24.de", "config": { "type": "xpath", - "xpath": "div[@class='news']" + "xpath": "div[@class='c2a']" } } diff --git a/mods/polizei-brandenburg-hvl.json b/mods/polizei-brandenburg-hvl.json new file mode 100644 index 0000000..91f63ed --- /dev/null +++ b/mods/polizei-brandenburg-hvl.json @@ -0,0 +1,9 @@ +{ + "name": "Polizei Havelland", + "feed": "http://www.internetwache.brandenburg.de/sixcms/list.php?page=rss_hvl", + "match": "internetwache.brandenburg.de", + "config": { + "type": "xpath", + "xpath": "div[@id='content']/div[@class='box_content']" + } +} diff --git a/tests/xpath.php b/tests/xpath.php index 1269ae5..4e422ad 100644 --- a/tests/xpath.php +++ b/tests/xpath.php @@ -2,11 +2,11 @@ $config = array( 'type' => 'xpath', - 'xpath' => 'div[@class="bacontent"]', + 'xpath' => 'div[@itemprop="articleBody"]', ); $article = array( - 'link' => 'http://www.berlin.de/polizei/presse-fahndung/archiv/383117/index.html', + 'link' => 'http://www.der-postillon.com/2013/04/nordkoreas-armee-nach-wochenlangem.html', 'content' => 'This is the feed content', 'plugin_data' => '', );