From 6f768d766358ac7de862af3082b325e814e0ba45 Mon Sep 17 00:00:00 2001 From: Thedro Neely Date: Mon, 25 Mar 2019 21:27:46 -0400 Subject: app/model/HTMLExtract: Add HMTL Extractor Extract posts and projects and add to home page --- app/model/HTMLExtract.php | 64 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 app/model/HTMLExtract.php (limited to 'app/model') diff --git a/app/model/HTMLExtract.php b/app/model/HTMLExtract.php new file mode 100644 index 0000000..5c3f727 --- /dev/null +++ b/app/model/HTMLExtract.php @@ -0,0 +1,64 @@ +preserveWhiteSpace = false; + $this->strictErrorChecking = false; + $this->formatOutput = true; + } + + public function setSource($source) + { + $this->source = file_get_contents($source); + return $this; + } + + public function getInnerHTML($tag, $id=null, $nodeValue = false) + { + if (empty($this->source)) { + throw new Exception('Error: Missing $this->source, use setSource() first'); + } + + $html = null; + $this->loadHTML($this->source); + $element = $this->getElementsByTagName($tag); + + foreach ($element as $tags) { + if ($id !== null) { + $attr = explode('=', $id); + if ($tags->getAttribute($attr[0]) == $attr[1]) { + if ($nodeValue == true) { + $html .= trim($tags->nodeValue); + } else { + $html .= $this->innerHTML($tags); + } + } + } else { + if ($nodeValue == true) { + $html .= trim($tags->nodeValue); + } else { + $html .= $this->innerHTML($tags); + } + } + } + return $html; + } + + protected function innerHTML($document) + { + $html = ""; + foreach ($document->childNodes as $v) { + $tmp = new DOMDocument(); + $tmp->appendChild($tmp->importNode($v, true)); + $html .= trim($tmp->saveHTML()); + } + return $html; + } + +} -- cgit v1.2.3