Added charset detection from HTTP headers. Fixes #1
This commit is contained in:
38
init.php
38
init.php
@ -46,6 +46,8 @@ class Af_Feedmod extends Plugin implements IHandler
|
||||
|
||||
function hook_article_filter($article)
|
||||
{
|
||||
global $fetch_last_content_type;
|
||||
|
||||
$json_conf = $this->host->get($this, 'json_conf');
|
||||
$owner_uid = $article['owner_uid'];
|
||||
$data = json_decode($json_conf, true);
|
||||
@ -61,7 +63,41 @@ class Af_Feedmod extends Plugin implements IHandler
|
||||
switch ($config['type']) {
|
||||
case 'xpath':
|
||||
$doc = new DOMDocument();
|
||||
@$doc->loadHTML(fetch_file_contents($article['link']));
|
||||
|
||||
if (version_compare(VERSION, '1.7.9', '>=')) {
|
||||
$html = fetch_file_contents($article['link']);
|
||||
$content_type = $fetch_last_content_type;
|
||||
} else {
|
||||
// fallback to file_get_contents()
|
||||
$html = file_get_contents($article['link']);
|
||||
|
||||
// try to fetch charset from HTTP headers
|
||||
$headers = $http_response_header;
|
||||
$content_type = false;
|
||||
foreach ($headers as $h) {
|
||||
if (substr(strtolower($h), 0, 13) == 'content-type:') {
|
||||
$content_type = substr($h, 14);
|
||||
// don't break here to find LATEST (if redirected) entry
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isset($config['force_charset'])) {
|
||||
$charset = false;
|
||||
if ($content_type) {
|
||||
preg_match('/charset=(\S+)/', $content_type, $matches);
|
||||
if (isset($matches[1]) && !empty($matches[1])) $charset = $matches[1];
|
||||
}
|
||||
|
||||
if ($charset) {
|
||||
$html = '<?xml encoding="' . $charset . '">' . $html;
|
||||
}
|
||||
} else {
|
||||
// use forced charset
|
||||
$html = '<?xml encoding="' . $config['force_charset'] . '">' . $html;
|
||||
}
|
||||
|
||||
@$doc->loadHTML($html);
|
||||
|
||||
if ($doc) {
|
||||
$basenode = false;
|
||||
|
Reference in New Issue
Block a user