diff options
| author | 2012-11-07 22:30:21 +0100 | |
|---|---|---|
| committer | 2012-11-07 22:30:21 +0100 | |
| commit | 403923d3d12ed8316fd56ab758547b0352fb0ceb (patch) | |
| tree | 6c40d0234ee68c63b35a40f4d6f5e10f996b3879 /lib/lib_rss.php | |
| parent | 85cc23fb4fde71809a6e510dabb66a7e4d612233 (diff) | |
Changement de library pour parser les sites dont on doit récupérer le contenu
Diffstat (limited to 'lib/lib_rss.php')
| -rw-r--r-- | lib/lib_rss.php | 34 |
1 files changed, 15 insertions, 19 deletions
diff --git a/lib/lib_rss.php b/lib/lib_rss.php index f0a65fa49..e957a11fb 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -181,28 +181,24 @@ function get_path ($url) { } -/* supprime les trucs inutiles des balises html */ -function good_bye_extra ($element) { - $element->style = null; - $element->class = null; - $element->id = null; - $element->onload = null; -} /* permet de récupérer le contenu d'un article pour un flux qui n'est pas complet */ function get_content_by_parsing ($url, $path) { - $html = new simple_html_dom (); - $html->set_callback ('good_bye_extra'); - $ok = $html->load_file ($url); + $html = file_get_contents ($url); - if ($ok !== false) { - $content = $html->find ($path, 0); - $html->clear (); - - if ($content) { - return $content->__toString (); - } else { - throw new Exception (); - } + if ($html) { + $doc = phpQuery::newDocument ($html); + $content = $doc->find ($path); + $content->find ('*')->removeAttr ('style') + ->removeAttr ('id') + ->removeAttr ('class') + ->removeAttr ('onload') + ->removeAttr ('target'); + $content->removeAttr ('style') + ->removeAttr ('id') + ->removeAttr ('class') + ->removeAttr ('onload') + ->removeAttr ('target'); + return $content->__toString (); } else { throw new Exception (); } |
