: str_replace(): Passing null to parameter #2 ($replace) of type array|string is deprecated in
$ret = $this->innertext();
$ret = str_ireplace('<![CDATA[', '', $ret);
$ret = str_replace(']]>', '', $ret);
// text, comment, unknown
if (isset($this->_[HDOM_INFO_TEXT])) {
return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
foreach ($this->attr as $key => $val) {
// skip removed attribute
if ($val === null || $val === false) { continue; }
$ret .= $this->_[HDOM_INFO_SPACE][$i][0];
//no value attr: nowrap, checked selected...
switch ($this->_[HDOM_INFO_QUOTE][$i])
case HDOM_QUOTE_DOUBLE: $quote = '"'; break;
case HDOM_QUOTE_SINGLE: $quote = '\''; break;
. $this->_[HDOM_INFO_SPACE][$i][1]
. $this->_[HDOM_INFO_SPACE][$i][2]
$ret = $this->dom->restore_noise($ret);
return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
function find($selector, $idx = null, $lowercase = false)
$selectors = $this->parse_selector($selector);
if (($count = count($selectors)) === 0) { return array(); }
for ($c = 0; $c < $count; ++$c) {
// The change on the below line was documented on the sourceforge
// code tracker id 2788009
// used to be: if (($levle=count($selectors[0]))===0) return array();
if (($levle = count($selectors[$c])) === 0) { return array(); }
if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); }
$head = array($this->_[HDOM_INFO_BEGIN] => 1);
$cmd = ' '; // Combinator
// handle descendant selectors, no recursive!
for ($l = 0; $l < $levle; ++$l) {
foreach ($head as $k => $v) {
$n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
//PaperG - Pass this optional parameter on to the seek function.
$n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
$cmd = $selectors[$c][$l][4]; // Next Combinator
foreach ($head as $k => $v) {
if (!isset($found_keys[$k])) {
foreach ($found_keys as $k => $v) {
$found[] = $this->dom->nodes[$k];
// return nth-element or array
if (is_null($idx)) { return $found; }
elseif ($idx < 0) { $idx = count($found) + $idx; }
return (isset($found[$idx])) ? $found[$idx] : null;
protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
list($tag, $id, $class, $attributes, $cmb) = $selector;
if ($parent_cmd === ' ') { // Descendant Combinator
// Find parent closing tag if the current element doesn't have a closing
// tag (i.e. void element)
$end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) {
$parent = $parent->parent;
$end += $parent->_[HDOM_INFO_END];
// Get list of target nodes
$nodes_start = $this->_[HDOM_INFO_BEGIN] + 1;
$nodes_count = $end - $nodes_start;
$nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true);
} elseif ($parent_cmd === '>') { // Child Combinator
$nodes = $this->children;
} elseif ($parent_cmd === '+'
&& in_array($this, $this->parent->children)) { // Next-Sibling Combinator
$index = array_search($this, $this->parent->children, true) + 1;
if ($index < count($this->parent->children))
$nodes[] = $this->parent->children[$index];
} elseif ($parent_cmd === '~'
&& in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
$index = array_search($this, $this->parent->children, true);
$nodes = array_slice($this->parent->children, $index);
// Go throgh each element starting at this element until the end tag
// Note: If this element is a void tag, any previous void element is
foreach($nodes as $node) {
// Handle 'text' selector
if($pass && $tag === 'text' && $node->tag === 'text') {
$ret[array_search($node, $this->dom->nodes, true)] = 1;
// Skip if node isn't a child node (i.e. text nodes)
if($pass && !in_array($node, $node->parent->children, true)) {
// Skip if tag doesn't match
if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
// Skip if ID doesn't exist
if ($pass && $id !== '' && !isset($node->attr['id'])) {
if ($pass && $id !== '' && isset($node->attr['id'])) {
// Note: Only consider the first ID (as browsers do)
$node_id = explode(' ', trim($node->attr['id']))[0];
if($id !== $node_id) { $pass = false; }
// Check if all class(es) exist
if ($pass && $class !== '' && is_array($class) && !empty($class)) {
if (isset($node->attr['class'])) {
$node_classes = explode(' ', $node->attr['class']);
$node_classes = array_map('strtolower', $node_classes);
if(!in_array($c, $node_classes)) {
&& !empty($attributes)) {
foreach($attributes as $a) {
// Handle indexing attributes (i.e. "[2]")
* Note: This is not supported by the CSS Standard but adds
* the ability to select items compatible to XPath (i.e.
* the 3rd element within it's parent).
* Note: This doesn't conflict with the CSS Standard which
* doesn't work on numeric attributes anyway.
if (is_numeric($att_name)
// Find index of current element in parent
foreach ($node->parent->children as $c) {
if ($c->tag === $node->tag) ++$count;
// If this is the correct node, continue with next
if ($count === (int)$att_name) continue;
// Check attribute availability
if ($att_inv) { // Attribute should NOT be set
if (isset($node->attr[$att_name])) {
} else { // Attribute should be set
// todo: "plaintext" is not a valid CSS selector!
if ($att_name !== 'plaintext'
&& !isset($node->attr[$att_name])) {
// Continue with next attribute if expression isn't defined
if ($att_expr === '') continue;
// If they have told us that this is a "plaintext"
// search then we want the plaintext of the node - right?
// todo "plaintext" is not a valid CSS selector!
if ($att_name === 'plaintext') {
$nodeKeyValue = $node->text();
$nodeKeyValue = $node->attr[$att_name];
if (is_object($debug_object)) {
$debug_object->debug_log(2,
. ' where nodes value is: '
// If lowercase is set, do a case insensitive test of
// the value of the selector.
strtolower($nodeKeyValue),
if (is_object($debug_object)) {
$debug_object->debug_log(2,
. ($check ? 'true' : 'false')
// Found a match. Add to list and clear node
if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1;
// It's passed by reference so this is actually what this function returns.
if (is_object($debug_object)) {
$debug_object->debug_log(1, 'EXIT - ret: ', $ret);
protected function match($exp, $pattern, $value, $case_sensitivity)
if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
if ($case_sensitivity === 'i') {
$pattern = strtolower($pattern);
$value = strtolower($value);
return ($value === $pattern);
return ($value !== $pattern);
return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
* Represents an element with the att attribute, its value
* either being exactly "val" or beginning with "val"
* immediately followed by "-" (U+002D).
return strpos($value, $pattern) === 0;
* Represents an element with the att attribute whose value is a
* whitespace-separated list of words, one of which is exactly
* "val". If "val" contains whitespace, it will never represent
* anything (since the words are separated by spaces). Also if
* "val" is the empty string, it will never represent anything.
return in_array($pattern, explode(' ', trim($value)), true);
protected function parse_selector($selector_string)
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
* Pattern of CSS selectors, modified from mootools (https://mootools.net/)
* Paperg: Add the colon to the attribute, so that it properly finds
* <tag attr:ibute="something" > like google does.
* Note: if you try to look at this attribute, you MUST use getAttribute
* since $dom->x:y will fail the php syntax check.
* Notice the \[ starting the attribute? and the @? following? This
* implies that an attribute can begin with an @ sign that is not
* captured. This implies that an html attribute specifier may start
* with an @ sign that is NOT captured by the expression. Farther study
* is required to determine of this should be documented or removed.
* Matches selectors in this order:
* Matches the tag name consisting of zero or more words, colons,
* Optionally matches a id name, consisting of an "#" followed by
* the id name (one or more words and hyphens).
* [3] - class names (including dots)
* Optionally matches a list of classs, consisting of an "."
* followed by the class name (one or more words and hyphens)
* where multiple classes can be chained (i.e. ".foo.bar.baz")
* ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
* Optionally matches the attributes list
* Matches the selector list separator
// phpcs:ignore Generic.Files.LineLength
$pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is";
trim($selector_string) . ' ', // Add final ' ' as pseudo separator
if (is_object($debug_object)) {
$debug_object->debug_log(2, 'Matches Array: ', $matches);
foreach ($matches as $m) {
if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
if ($this->dom->lowercase) {
$m[1] = strtolower($m[1]);
if ($m[3] !== '') { $m[3] = explode('.', $m[3]); }
/* Extract attributes (pattern based on the pattern above!)
* [2] - attribute expression
* Note: Attributes can be negated with a "!" prefix to their name
"/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
// Replace element by array
foreach($attributes as $att) {
if(trim($att[0]) === '') { continue; }
$inverted = (isset($att[1][0]) && $att[1][0] === '!');
$inverted ? substr($att[1], 1) : $att[1], // Name
(isset($att[2])) ? $att[2] : '', // Expression
(isset($att[3])) ? $att[3] : '', // Value
$inverted, // Inverted Flag
(isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator
} else { // Other Separator
// Clear Separator if it's a Selector List
if ($is_list = ($m[5] === ',')) { $m[5] = ''; }
// Remove full match before adding to results