diff options
author | Thedro Neely <thedroneely@gmail.com> | 2019-03-25 21:27:46 -0400 |
---|---|---|
committer | Thedro Neely <thedroneely@gmail.com> | 2019-03-25 21:27:46 -0400 |
commit | 6f768d766358ac7de862af3082b325e814e0ba45 (patch) | |
tree | ddaaff83a4071cfd7d13024cc1565a9de49ef76b /app/model | |
parent | c383d5f68134e49ccdb9448611c8ba3b73977add (diff) | |
download | thedroneely.com-6f768d766358ac7de862af3082b325e814e0ba45.tar.gz thedroneely.com-6f768d766358ac7de862af3082b325e814e0ba45.tar.bz2 thedroneely.com-6f768d766358ac7de862af3082b325e814e0ba45.zip |
app/model/HTMLExtract: Add HMTL Extractor
Extract posts and projects and add to home page
Diffstat (limited to 'app/model')
-rw-r--r-- | app/model/HTMLExtract.php | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/app/model/HTMLExtract.php b/app/model/HTMLExtract.php new file mode 100644 index 0000000..5c3f727 --- /dev/null +++ b/app/model/HTMLExtract.php @@ -0,0 +1,64 @@ +<?php + +class DOMExtract extends DOMDocument +{ + private $source; + private $document; + + public function __construct() + { + libxml_use_internal_errors(true); + $this->preserveWhiteSpace = false; + $this->strictErrorChecking = false; + $this->formatOutput = true; + } + + public function setSource($source) + { + $this->source = file_get_contents($source); + return $this; + } + + public function getInnerHTML($tag, $id=null, $nodeValue = false) + { + if (empty($this->source)) { + throw new Exception('Error: Missing $this->source, use setSource() first'); + } + + $html = null; + $this->loadHTML($this->source); + $element = $this->getElementsByTagName($tag); + + foreach ($element as $tags) { + if ($id !== null) { + $attr = explode('=', $id); + if ($tags->getAttribute($attr[0]) == $attr[1]) { + if ($nodeValue == true) { + $html .= trim($tags->nodeValue); + } else { + $html .= $this->innerHTML($tags); + } + } + } else { + if ($nodeValue == true) { + $html .= trim($tags->nodeValue); + } else { + $html .= $this->innerHTML($tags); + } + } + } + return $html; + } + + protected function innerHTML($document) + { + $html = ""; + foreach ($document->childNodes as $v) { + $tmp = new DOMDocument(); + $tmp->appendChild($tmp->importNode($v, true)); + $html .= trim($tmp->saveHTML()); + } + return $html; + } + +} |