: str_replace(): Passing null to parameter #2 ($replace) of type array|string is deprecated in
* Website: http://sourceforge.net/projects/simplehtmldom/
* Additional projects: http://sourceforge.net/projects/debugobject/
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
* Licensed under The MIT License
* See the LICENSE file in the project root for more information.
* Version Rev. 1.9.1 (291)
define('HDOM_TYPE_ELEMENT', 1);
define('HDOM_TYPE_COMMENT', 2);
define('HDOM_TYPE_TEXT', 3);
define('HDOM_TYPE_ENDTAG', 4);
define('HDOM_TYPE_ROOT', 5);
define('HDOM_TYPE_UNKNOWN', 6);
define('HDOM_QUOTE_DOUBLE', 0);
define('HDOM_QUOTE_SINGLE', 1);
define('HDOM_QUOTE_NO', 3);
define('HDOM_INFO_BEGIN', 0);
define('HDOM_INFO_END', 1);
define('HDOM_INFO_QUOTE', 2);
define('HDOM_INFO_SPACE', 3);
define('HDOM_INFO_TEXT', 4);
define('HDOM_INFO_INNER', 5);
define('HDOM_INFO_OUTER', 6);
define('HDOM_INFO_ENDSPACE', 7);
defined('DEFAULT_TARGET_CHARSET') || define('DEFAULT_TARGET_CHARSET', 'UTF-8');
defined('DEFAULT_BR_TEXT') || define('DEFAULT_BR_TEXT', "\r\n");
defined('DEFAULT_SPAN_TEXT') || define('DEFAULT_SPAN_TEXT', ' ');
defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000);
define('HDOM_SMARTY_AS_TEXT', 1);
* It checks for data in the node before returning.
* @param simple_html_dom_node $node
* @param string $attr_name
* @return string it returns data from the node if found or empty strings otherwise.
function embedpress_get_markup_from_node( $node, $method='innertext', $attr_name=''){
if ( !empty( $node) && is_object( $node) ) {
if ( !empty( $attr_name) ) {
return $node->getAttribute( $attr_name );
if ( !empty( $method) && method_exists( $node, $method) ) {
return $node->{$method}();
* @param false $use_include_path
* @param bool $forceTagsClosed
* @param string $target_charset
* @param string $defaultBRText
* @param string $defaultSpanText
* @return simple_html_dom|false
$use_include_path = false,
$target_charset = DEFAULT_TARGET_CHARSET,
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT)
if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
/**@var simple_html_dom $dom */
$dom = new simple_html_dom(
* For sourceforge users: uncomment the next line and comment the
* retrieve_url_contents line 2 lines down if it is not already done.
$contents = file_get_contents(
// $contents = retrieve_url_contents($url);
if (empty($contents) || strlen($contents) > $maxLen) {
return $dom->load($contents, $lowercase, $stripRN);
* @param bool $forceTagsClosed
* @param string $target_charset
* @param string $defaultBRText
* @param string $defaultSpanText
* @return simple_html_dom|false
$target_charset = DEFAULT_TARGET_CHARSET,
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT)
$dom = new simple_html_dom(
if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
return $dom->load($str, $lowercase, $stripRN);
function dump_html_tree($node, $show_attr = true, $deep = 0)
class simple_html_dom_node
public $nodetype = HDOM_TYPE_TEXT;
public $children = array();
function __construct($dom)
return $this->outertext();
function dump($show_attr = true, $depth = 0)
echo str_repeat("\t", $depth) . $this->tag;
if ($show_attr && count($this->attr) > 0) {
foreach ($this->attr as $k => $v) {
foreach ($this->nodes as $node) {
$node->dump($show_attr, $depth + 1);
function dump_node($echo = true)
if (count($this->attr) > 0) {
foreach ($this->attr as $k => $v) {
$string .= "[$k]=>\"$v\", ";
if (count($this->_) > 0) {
foreach ($this->_ as $k => $v) {
foreach ($v as $k2 => $v2) {
$string .= "[$k2]=>\"$v2\", ";
$string .= "[$k]=>\"$v\", ";
if (isset($this->text)) {
$string .= " text: ({$this->text})";
$string .= ' HDOM_INNER_INFO: ';
if (isset($node->_[HDOM_INFO_INNER])) {
$string .= "'" . $node->_[HDOM_INFO_INNER] . "'";
$string .= ' children: ' . count($this->children);
$string .= ' nodes: ' . count($this->nodes);
$string .= ' tag_start: ' . $this->tag_start;
function parent($parent = null)
// I am SURE that this doesn't work properly.
// It fails to unset the current node from it's current parents nodes or
$this->parent->nodes[] = $this;
$this->parent->children[] = $this;
return !empty($this->children);
function children($idx = -1)
if (isset($this->children[$idx])) {
return $this->children[$idx];
if (count($this->children) > 0) {
return $this->children[0];
if (count($this->children) > 0) {
return end($this->children);
if ($this->parent === null) {
$idx = array_search($this, $this->parent->children, true);
if ($idx !== false && isset($this->parent->children[$idx + 1])) {
return $this->parent->children[$idx + 1];
if ($this->parent === null) {
$idx = array_search($this, $this->parent->children, true);
if ($idx !== false && $idx > 0) {
return $this->parent->children[$idx - 1];
function find_ancestor_tag($tag)
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
if ($this->parent === null) {
$ancestor = $this->parent;
while (!is_null($ancestor)) {
if (is_object($debug_object)) {
$debug_object->debug_log(2, 'Current tag is: ' . $ancestor->tag);
if ($ancestor->tag === $tag) {
$ancestor = $ancestor->parent;
if (isset($this->_[HDOM_INFO_INNER])) {
return $this->_[HDOM_INFO_INNER];
if (isset($this->_[HDOM_INFO_TEXT])) {
return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
foreach ($this->nodes as $n) {
if (is_object($debug_object)) {
if ($this->tag === 'text') {
if (!empty($this->text)) {
$text = ' with text: ' . $this->text;
$debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
if ($this->tag === 'root') {
return $this->innertext();
// todo: What is the use of this callback? Remove?
if ($this->dom && $this->dom->callback !== null) {
call_user_func_array($this->dom->callback, array($this));
if (isset($this->_[HDOM_INFO_OUTER])) {
return $this->_[HDOM_INFO_OUTER];
if (isset($this->_[HDOM_INFO_TEXT])) {
return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) {
$ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
if (isset($this->_[HDOM_INFO_INNER])) {
// todo: <br> should either never have HDOM_INFO_INNER or always
if ($this->tag !== 'br') {
$ret .= $this->_[HDOM_INFO_INNER];
} elseif ($this->nodes) {
foreach ($this->nodes as $n) {
$ret .= $this->convert_text($n->outertext());
if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) {
$ret .= '</' . $this->tag . '>';
if (isset($this->_[HDOM_INFO_INNER])) {
return $this->_[HDOM_INFO_INNER];
switch ($this->nodetype) {
case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
case HDOM_TYPE_COMMENT: return '';
case HDOM_TYPE_UNKNOWN: return '';
if (strcasecmp($this->tag, 'script') === 0) { return ''; }
if (strcasecmp($this->tag, 'style') === 0) { return ''; }
// In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed
// for some span tags, and some p tags) $this->nodes is set to NULL.
// NOTE: This indicates that there is a problem where it's set to NULL
// without a clear happening.
// WHY is this happening?
if (!is_null($this->nodes)) {
foreach ($this->nodes as $n) {
// Start paragraph after a blank line
$ret = trim($ret) . "\n\n";
$ret .= $this->convert_text($n->text());
// If this node is a span... add a space at the end of it so
// multiple spans don't run into each other. This is plaintext
if ($n->tag === 'span') {
$ret .= $this->dom->default_span_text;