: str_replace(): Passing null to parameter #2 ($replace) of type array|string is deprecated in
// The start tag cannot contain another start tag, if so add as text
if ($pos = strpos($tag, '<') !== false) {
$tag = '<' . substr($tag, 0, -1);
$node->_[HDOM_INFO_TEXT] = $tag;
$this->link_nodes($node, false);
$this->char = $this->doc[--$this->pos]; // prev
// Handle invalid tag names (i.e. "<html#doc>")
if (!preg_match('/^\w[\w:-]*$/', $tag)) {
$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
// Next char is the beginning of a new tag, don't touch it.
if ($this->char === '<') {
$this->link_nodes($node, false);
// Next char closes current tag, add and be done with it.
if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
$this->link_nodes($node, false);
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
// begin tag, add new node
$node->nodetype = HDOM_TYPE_ELEMENT;
$tag_lower = strtolower($tag);
$node->tag = ($this->lowercase) ? $tag_lower : $tag;
// handle optional closing tags
if (isset($this->optional_closing_tags[$tag_lower])) {
// Traverse ancestors to close all optional closing tags
while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
$this->parent->_[HDOM_INFO_END] = 0;
$this->parent = $this->parent->parent;
$node->parent = $this->parent;
$guard = 0; // prevent infinity loop
// [0] Space between tag and first attribute
$space = array($this->copy_skip($this->token_blank), '', '');
// Everything until the first equal sign should be the attribute name
$name = $this->copy_until($this->token_equal);
if ($name === '' && $this->char !== null && $space[0] === '') {
if ($guard === $this->pos) { // Escape infinite loop
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
// Out of bounds before the tag ended
if ($this->pos >= $this->size - 1 && $this->char !== '>') {
$node->nodetype = HDOM_TYPE_TEXT;
$node->_[HDOM_INFO_END] = 0;
$node->_[HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
$this->link_nodes($node, false);
// Attributes cannot start after opening tag
if ($this->doc[$this->pos - 1] == '<') {
$node->nodetype = HDOM_TYPE_TEXT;
$node->_[HDOM_INFO_END] = 0;
$node->_[HDOM_INFO_TEXT] = substr(
$this->pos - $begin_tag_pos - 1
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
$this->link_nodes($node, false);
if ($name !== '/' && $name !== '') { // this is a attribute name
// [1] Whitespace after attribute name
$space[1] = $this->copy_skip($this->token_blank);
$name = $this->restore_noise($name); // might be a noisy name
if ($this->lowercase) { $name = strtolower($name); }
if ($this->char === '=') { // attribute with value
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
$this->parse_attr($node, $name, $space); // get attribute value
//no value attr: nowrap, checked selected...
$node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
$node->attr[$name] = true;
if ($this->char != '>') { $this->char = $this->doc[--$this->pos]; } // prev
$node->_[HDOM_INFO_SPACE][] = $space;
// prepare for next attribute
$this->copy_skip($this->token_blank),
} else { // no more attributes
} while ($this->char !== '>' && $this->char !== '/'); // go until the tag ended
$this->link_nodes($node, true);
$node->_[HDOM_INFO_ENDSPACE] = $space[0];
// handle empty tags (i.e. "<div/>")
if ($this->copy_until_char('>') === '/') {
$node->_[HDOM_INFO_ENDSPACE] .= '/';
$node->_[HDOM_INFO_END] = 0;
if (!isset($this->self_closing_tags[strtolower($node->tag)])) {
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
// If it's a BR tag, we need to set it's text to the default text.
// This way when we see it in plaintext, we can generate formatting that the user wants.
// since a br tag never has sub nodes, this works well.
if ($node->tag === 'br') {
$node->_[HDOM_INFO_INNER] = $this->default_br_text;
protected function parse_attr($node, $name, &$space)
$is_duplicate = isset($node->attr[$name]);
if (!$is_duplicate) // Copy whitespace between "=" and value
$space[2] = $this->copy_skip($this->token_blank);
$quote_type = HDOM_QUOTE_DOUBLE;
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
$value = $this->copy_until_char('"');
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
$quote_type = HDOM_QUOTE_SINGLE;
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
$value = $this->copy_until_char('\'');
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
$quote_type = HDOM_QUOTE_NO;
$value = $this->copy_until($this->token_attr);
$value = $this->restore_noise($value);
// PaperG: Attributes should not have \r or \n in them, that counts as
$value = str_replace("\r", '', $value);
$value = str_replace("\n", '', $value);
// PaperG: If this is a "class" selector, lets get rid of the preceeding
// and trailing space since some people leave it in the multi class case.
$node->_[HDOM_INFO_QUOTE][] = $quote_type;
$node->attr[$name] = $value;
protected function link_nodes(&$node, $is_child)
$node->parent = $this->parent;
$this->parent->nodes[] = $node;
$this->parent->children[] = $node;
protected function as_text_node($tag)
$node = new simple_html_dom_node($this);
$node->_[HDOM_INFO_TEXT] = '</' . $tag . '>';
$this->link_nodes($node, false);
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
protected function skip($chars)
$this->pos += strspn($this->doc, $chars, $this->pos);
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
protected function copy_skip($chars)
$len = strspn($this->doc, $chars, $pos);
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
if ($len === 0) { return ''; }
return substr($this->doc, $pos, $len);
protected function copy_until($chars)
$len = strcspn($this->doc, $chars, $pos);
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
return substr($this->doc, $pos, $len);
protected function copy_until_char($char)
if ($this->char === null) { return ''; }
if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
$ret = substr($this->doc, $this->pos, $this->size - $this->pos);
$this->pos = $this->size;
if ($pos === $this->pos) { return ''; }
$this->char = $this->doc[$pos];
return substr($this->doc, $pos_old, $pos - $pos_old);
protected function remove_noise($pattern, $remove_tag = false)
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
PREG_SET_ORDER | PREG_OFFSET_CAPTURE
for ($i = $count - 1; $i > -1; --$i) {
$key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
if (is_object($debug_object)) {
$debug_object->debug_log(2, 'key is: ' . $key);
$idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
$this->noise[$key] = $matches[$i][$idx][0];
$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
// reset the length of content
$this->size = strlen($this->doc);
$this->char = $this->doc[0];
function restore_noise($text)
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
while (($pos = strpos($text, '___noise___')) !== false) {
// Sometimes there is a broken piece of markup, and we don't GET the
// pos+11 etc... token which indicates a problem outside of us...
// todo: "___noise___1000" (or any number with four or more digits)
// in the DOM causes an infinite loop which could be utilized by
if (strlen($text) > $pos + 15) {
if (is_object($debug_object)) {
$debug_object->debug_log(2, 'located key of: ' . $key);
if (isset($this->noise[$key])) {
$text = substr($text, 0, $pos)
. substr($text, $pos + 16);
// do this to prevent an infinite loop.
$text = substr($text, 0, $pos)
. 'UNDEFINED NOISE FOR KEY: '
. substr($text, $pos + 16);
// There is no valid key being given back to us... We must get
// rid of the ___noise___ or we will have a problem.
$text = substr($text, 0, $pos)
. substr($text, $pos + 11);
function search_noise($text)
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
foreach($this->noise as $noiseElement) {
if (strpos($noiseElement, $text) !== false) {
return $this->root->innertext();
return $this->root->innertext();
return $this->root->innertext();
return $this->root->text();
return $this->_target_charset;
function childNodes($idx = -1)
return $this->root->childNodes($idx);
return $this->root->first_child();
return $this->root->last_child();
function createElement($name, $value = null)
return @str_get_html("<$name>$value</$name>")->firstChild();
function createTextNode($value)
return @end(str_get_html($value)->nodes);
function getElementById($id)
return $this->find("#$id", 0);
function getElementsById($id, $idx = null)
return $this->find("#$id", $idx);
function getElementByTagName($name)
return $this->find($name, 0);
function getElementsByTagName($name, $idx = -1)
return $this->find($name, $idx);