Edit File by line

<?php

[0] Fix | Delete

[1] Fix | Delete

namespace AmpProject\Dom;

[2] Fix | Delete

[3] Fix | Delete

use AmpProject\Amp;

[4] Fix | Delete

use AmpProject\Attribute;

[5] Fix | Delete

use AmpProject\DevMode;

[6] Fix | Delete

use AmpProject\Dom\Document\Encoding;

[7] Fix | Delete

use AmpProject\Dom\Document\Option;

[8] Fix | Delete

use AmpProject\Exception\FailedToRetrieveRequiredDomElement;

[9] Fix | Delete

use AmpProject\Exception\MaxCssByteCountExceeded;

[10] Fix | Delete

use AmpProject\Optimizer\CssRule;

[11] Fix | Delete

use AmpProject\Tag;

[12] Fix | Delete

use DOMAttr;

[13] Fix | Delete

use DOMComment;

[14] Fix | Delete

use DOMDocument;

[15] Fix | Delete

use DOMElement;

[16] Fix | Delete

use DOMNode;

[17] Fix | Delete

use DOMNodeList;

[18] Fix | Delete

use DOMText;

[19] Fix | Delete

use DOMXPath;

[20] Fix | Delete

[21] Fix | Delete

/**

[22] Fix | Delete

* Abstract away some of the difficulties of working with PHP's DOMDocument.

[23] Fix | Delete

[24] Fix | Delete

* @property DOMXPath $xpath XPath query object for this document.

[25] Fix | Delete

* @property Element $html The document's <html> element.

[26] Fix | Delete

* @property Element $head The document's <head> element.

[27] Fix | Delete

* @property Element $body The document's <body> element.

[28] Fix | Delete

* @property Element|null $viewport The document's viewport meta element.

[29] Fix | Delete

* @property DOMNodeList $ampElements The document's <amp-*> elements.

[30] Fix | Delete

* @property Element $ampCustomStyle The document's <style amp-custom> element.

[31] Fix | Delete

* @property int $ampCustomStyleByteCount Count of bytes of the CSS in the <style amp-custom> tag.

[32] Fix | Delete

* @property int $inlineStyleByteCount Count of bytes of the CSS in all of the inline style attributes.

[33] Fix | Delete

[34] Fix | Delete

* @package ampproject/amp-toolbox

[35] Fix | Delete

[36] Fix | Delete

final class Document extends DOMDocument

[37] Fix | Delete

{

[38] Fix | Delete

[39] Fix | Delete

/**

[40] Fix | Delete

* Default document type to use.

[41] Fix | Delete

[42] Fix | Delete

* @var string

[43] Fix | Delete

[44] Fix | Delete

const DEFAULT_DOCTYPE = '<!DOCTYPE html>';

[45] Fix | Delete

[46] Fix | Delete

/**

[47] Fix | Delete

* Regular expression to match the HTML doctype.

[48] Fix | Delete

[49] Fix | Delete

* @var string

[50] Fix | Delete

[51] Fix | Delete

const HTML_DOCTYPE_REGEX_PATTERN = '#<!doctype\s+html[^>]+?>#si';

[52] Fix | Delete

[53] Fix | Delete

/**

[54] Fix | Delete

* Attribute prefix for AMP-bind data attributes.

[55] Fix | Delete

[56] Fix | Delete

* @var string

[57] Fix | Delete

[58] Fix | Delete

const AMP_BIND_DATA_ATTR_PREFIX = 'data-amp-bind-';

[59] Fix | Delete

[60] Fix | Delete

/**

[61] Fix | Delete

* Pattern for HTML attribute accounting for binding attr name in square brackets syntax, boolean attribute,

[62] Fix | Delete

* single/double-quoted attribute value, and unquoted attribute values.

[63] Fix | Delete

[64] Fix | Delete

* @var string

[65] Fix | Delete

[66] Fix | Delete

const AMP_BIND_SQUARE_BRACKETS_ATTR_PATTERN = '#^\s+(?P<name>\[?[a-zA-Z0-9_\-]+\]?)'

[67] Fix | Delete

. '(?P<value>=(?>"[^"]*+"|\'[^\']*+\'|[^\'"\s]+))?#';

[68] Fix | Delete

[69] Fix | Delete

/**

[70] Fix | Delete

* Pattern for HTML attribute accounting for binding attr name in data attribute syntax, boolean attribute,

[71] Fix | Delete

* single/double-quoted attribute value, and unquoted attribute values.

[72] Fix | Delete

[73] Fix | Delete

* @var string

[74] Fix | Delete

[75] Fix | Delete

const AMP_BIND_DATA_ATTRIBUTE_ATTR_PATTERN = '#^\s+(?P<name>(?:'

[76] Fix | Delete

. self::AMP_BIND_DATA_ATTR_PREFIX

[77] Fix | Delete

. ')?[a-zA-Z0-9_\-]+)'

[78] Fix | Delete

. '(?P<value>=(?>"[^"]*+"|\'[^\']*+\'|[^\'"\s]+))?#';

[79] Fix | Delete

[80] Fix | Delete

/**

[81] Fix | Delete

* Match all start tags that contain a binding attribute in square brackets syntax.

[82] Fix | Delete

[83] Fix | Delete

* @var string

[84] Fix | Delete

[85] Fix | Delete

const AMP_BIND_SQUARE_START_PATTERN = '#<'

[86] Fix | Delete

. '(?P<name>[a-zA-Z0-9_\-]+)' // Tag name.

[87] Fix | Delete

. '(?P<attrs>\s+' // Attributes.

[88] Fix | Delete

. '(?>[^>"\'\[\]]+|"[^"]*+"|\'[^\']*+\')*+' // Non-binding attributes tokens.

[89] Fix | Delete

. '\[[a-zA-Z0-9_\-]+\]' // One binding attribute key.

[90] Fix | Delete

. '(?>[^>"\']+|"[^"]*+"|\'[^\']*+\')*+' // Any attribute tokens, including

[91] Fix | Delete

// binding ones.

[92] Fix | Delete

. ')>#s';

[93] Fix | Delete

[94] Fix | Delete

/**

[95] Fix | Delete

* Match all start tags that contain a binding attribute in data attribute syntax.

[96] Fix | Delete

[97] Fix | Delete

* @var string

[98] Fix | Delete

[99] Fix | Delete

const AMP_BIND_DATA_START_PATTERN = '#<'

[100] Fix | Delete

. '(?P<name>[a-zA-Z0-9_\-]+)' // Tag name.

[101] Fix | Delete

. '(?P<attrs>\s+' // Attributes.

[102] Fix | Delete

. '(?>' // Match at least one attribute

[103] Fix | Delete

. '(?>' // prefixed with "data-amp-bind-".

[104] Fix | Delete

. '(?![a-zA-Z0-9_\-\s]*'

[105] Fix | Delete

. self::AMP_BIND_DATA_ATTR_PREFIX

[106] Fix | Delete

. '[a-zA-Z0-9_\-]+="[^"]*+"|\'[^\']*+\')'

[107] Fix | Delete

. '[^>"\']+|"[^"]*+"|\'[^\']*+\''

[108] Fix | Delete

. ')*+'

[109] Fix | Delete

. '(?>[a-zA-Z0-9_\-\s]*'

[110] Fix | Delete

. self::AMP_BIND_DATA_ATTR_PREFIX

[111] Fix | Delete

. '[a-zA-Z0-9_\-]+'

[112] Fix | Delete

. ')'

[113] Fix | Delete

. ')+'

[114] Fix | Delete

. '(?>[^>"\']+|"[^"]*+"|\'[^\']*+\')*+' // Any attribute tokens, including

[115] Fix | Delete

// binding ones.

[116] Fix | Delete

. ')>#is';

[117] Fix | Delete

[118] Fix | Delete

[119] Fix | Delete

* Regular expressions to fetch the individual structural tags.

[120] Fix | Delete

* These patterns were optimized to avoid extreme backtracking on large documents.

[121] Fix | Delete

[122] Fix | Delete

const HTML_STRUCTURE_DOCTYPE_PATTERN = '/^(?<doctype>[^<]*(?>\s*\s*)*<!doctype(?>\s+[^>]+)?>)/is';

[123] Fix | Delete

const HTML_STRUCTURE_HTML_START_TAG = '/^(?<html_start>[^<]*(?>\s*\s*)*<html(?>\s+[^>]*)?>)/is';

[124] Fix | Delete

const HTML_STRUCTURE_HTML_END_TAG = '/(?<html_end><\/html(?>\s+[^>]*)?>.*)$/is';

[125] Fix | Delete

const HTML_STRUCTURE_HEAD_START_TAG = '/^[^<]*(?>\s*)*(?><head(?>\s+[^>]*)?>)/is';

[126] Fix | Delete

const HTML_STRUCTURE_BODY_START_TAG = '/^[^<]*(?>\s*)*(?><body(?>\s+[^>]*)?>)/is';

[127] Fix | Delete

const HTML_STRUCTURE_BODY_END_TAG = '/(?><\/body(?>\s+[^>]*)?>.*)$/is';

[128] Fix | Delete

const HTML_STRUCTURE_HEAD_TAG = '/^(?>[^<]*(?><head(?>\s+[^>]*)?>).*?<\/head(?>\s+[^>]*)?>)/is';

[129] Fix | Delete

[130] Fix | Delete

// Regex patterns used for securing and restoring the doctype node.

[131] Fix | Delete

const HTML_SECURE_DOCTYPE_IF_NOT_FIRST_PATTERN = '/(^[^<]*(?>\s*<!--[^>]*>\s*)+<)(!)(doctype)(\s+[^>]+?)(>)/i';

[132] Fix | Delete

const HTML_RESTORE_DOCTYPE_PATTERN = '/(^[^<]*(?>\s*<!--[^>]*>\s*)+<)'

[133] Fix | Delete

. '(!--amp-)(doctype)(\s+[^>]+?)(-->)/i';

[134] Fix | Delete

[135] Fix | Delete

// Regex pattern used for removing Internet Explorer conditional comments.

[136] Fix | Delete

const HTML_IE_CONDITIONAL_COMMENTS_PATTERN = '/<!--(?>\[if\s|<!\[endif)(?>[^>]+(?<!--)>)*(?>[^>]+(?<=--)>)/i';

[137] Fix | Delete

[138] Fix | Delete

/**

[139] Fix | Delete

* Xpath query to fetch the attributes that are being URL-encoded by saveHTML().

[140] Fix | Delete

[141] Fix | Delete

* @var string

[142] Fix | Delete

[143] Fix | Delete

const XPATH_URL_ENCODED_ATTRIBUTES_QUERY = './/*/@src|.//*/@href|.//*/@action';

[144] Fix | Delete

[145] Fix | Delete

/**

[146] Fix | Delete

* Xpath query to fetch the elements containing Mustache templates (both <template type=amp-mustache> and

[147] Fix | Delete

* <script type=text/plain template=amp-mustache>).

[148] Fix | Delete

[149] Fix | Delete

* @var string

[150] Fix | Delete

[151] Fix | Delete

const XPATH_MUSTACHE_TEMPLATE_ELEMENTS_QUERY = './/self::template[ @type = "amp-mustache" ]'

[152] Fix | Delete

. '|//self::script[ @type = "text/plain" '

[153] Fix | Delete

. 'and @template = "amp-mustache" ]';

[154] Fix | Delete

[155] Fix | Delete

/**

[156] Fix | Delete

* Error message to use when the __get() is triggered for an unknown property.

[157] Fix | Delete

[158] Fix | Delete

* @var string

[159] Fix | Delete

[160] Fix | Delete

const PROPERTY_GETTER_ERROR_MESSAGE = 'Undefined property: AmpProject\\Dom\\Document::';

[161] Fix | Delete

[162] Fix | Delete

/**

[163] Fix | Delete

* Charset compatibility tag for making DOMDocument behave.

[164] Fix | Delete

[165] Fix | Delete

* See: http://php.net/manual/en/domdocument.loadhtml.php#78243.

[166] Fix | Delete

[167] Fix | Delete

* @var string

[168] Fix | Delete

[169] Fix | Delete

const HTTP_EQUIV_META_TAG = '<meta http-equiv="content-type" content="text/html; charset=utf-8">';

[170] Fix | Delete

[171] Fix | Delete

// Regex patterns and values used for adding and removing http-equiv charsets for compatibility.

[172] Fix | Delete

// The opening tag pattern contains a comment to make sure we don't match a <head> tag within a comment.

[173] Fix | Delete

[174] Fix | Delete

const HTML_GET_HEAD_OPENING_TAG_PATTERN = '/(?>\s*)*<head(?>\s+[^>]*)?>/is';

[175] Fix | Delete

const HTML_GET_HEAD_OPENING_TAG_REPLACEMENT = '$0' . self::HTTP_EQUIV_META_TAG;

[176] Fix | Delete

const HTML_GET_HTML_OPENING_TAG_PATTERN = '/(?>\s*)*<html(?>\s+[^>]*)?>/is';

[177] Fix | Delete

const HTML_GET_HTML_OPENING_TAG_REPLACEMENT = '$0<head>' . self::HTTP_EQUIV_META_TAG . '</head>';

[178] Fix | Delete

const HTML_GET_HTTP_EQUIV_TAG_PATTERN = '#<meta http-equiv=([\'"])content-type\1 '

[179] Fix | Delete

. 'content=([\'"])text/html; '

[180] Fix | Delete

. 'charset=utf-8\2>#i';

[181] Fix | Delete

const HTML_HTTP_EQUIV_VALUE = 'content-type';

[182] Fix | Delete

const HTML_HTTP_EQUIV_CONTENT_VALUE = 'text/html; charset=utf-8';

[183] Fix | Delete

[184] Fix | Delete

// Regex patterns used for finding tags or extracting attribute values in an HTML string.

[185] Fix | Delete

const HTML_FIND_TAG_WITHOUT_ATTRIBUTE_PATTERN = '/<%1$s[^>]*?>[^<]*(?><\/%1$s>)?/i';

[186] Fix | Delete

const HTML_FIND_TAG_WITH_ATTRIBUTE_PATTERN = '/<%1$s [^>]*?\s*%2$s\s*=[^>]*?>[^<]*(?><\/%1$s>)?/i';

[187] Fix | Delete

const HTML_EXTRACT_ATTRIBUTE_VALUE_PATTERN = '/%s=(?>([\'"])(?<full>.*)?\1|(?<partial>[^ \'";]+))/';

[188] Fix | Delete

const HTML_FIND_TAG_DELIMITER = '/';

[189] Fix | Delete

[190] Fix | Delete

/**

[191] Fix | Delete

* Pattern to match an AMP emoji together with its variant (amp4ads, amp4email, ...).

[192] Fix | Delete

[193] Fix | Delete

* @var string

[194] Fix | Delete

[195] Fix | Delete

const AMP_EMOJI_ATTRIBUTE_PATTERN = '/<html\s([^>]*?(?:'

[196] Fix | Delete

. Attribute::AMP_EMOJI_ALT

[197] Fix | Delete

. '|'

[198] Fix | Delete

. Attribute::AMP_EMOJI

[199] Fix | Delete

. ')(4(?:ads|email))?[^>]*?)>/i';

[200] Fix | Delete

[201] Fix | Delete

// Attribute to use as a placeholder to move the emoji AMP symbol (⚡) over to DOM.

[202] Fix | Delete

const EMOJI_AMP_ATTRIBUTE_PLACEHOLDER = 'emoji-amp';

[203] Fix | Delete

[204] Fix | Delete

// Patterns used for fixing the mangled encoding of src attributes with SVG data.

[205] Fix | Delete

const I_AMPHTML_SIZER_REGEX_PATTERN = '/(?<before_src><i-amphtml-sizer\s+[^>]*>\s*<img\s+[^>]*?\s+src=([\'"]))'

[206] Fix | Delete

. '(?<src>.*?)'

[207] Fix | Delete

. '(?<after_src>\2><\/i-amphtml-sizer>)/i';

[208] Fix | Delete

const SRC_SVG_REGEX_PATTERN = '/^\s*(?<type>[^<]+)(?<value><svg[^>]+>)\s*$/i';

[209] Fix | Delete

[210] Fix | Delete

/**

[211] Fix | Delete

* XPath query to retrieve all <amp-*> tags, relative to the <body> node.

[212] Fix | Delete

[213] Fix | Delete

* @var string

[214] Fix | Delete

[215] Fix | Delete

const XPATH_AMP_ELEMENTS_QUERY = ".//*[starts-with(name(), 'amp-')]";

[216] Fix | Delete

[217] Fix | Delete

/**

[218] Fix | Delete

* XPath query to retrieve the <style amp-custom> tag, relative to the <head> node.

[219] Fix | Delete

[220] Fix | Delete

* @var string

[221] Fix | Delete

[222] Fix | Delete

const XPATH_AMP_CUSTOM_STYLE_QUERY = './/style[@amp-custom]';

[223] Fix | Delete

[224] Fix | Delete

/**

[225] Fix | Delete

* XPath query to fetch the inline style attributes, relative to the <body> node.

[226] Fix | Delete

[227] Fix | Delete

* @var string

[228] Fix | Delete

[229] Fix | Delete

const XPATH_INLINE_STYLE_ATTRIBUTES_QUERY = './/@style';

[230] Fix | Delete

[231] Fix | Delete

/**

[232] Fix | Delete

* Associative array of options to configure the behavior of the DOM document abstraction.

[233] Fix | Delete

[234] Fix | Delete

* @see Option::DEFAULTS For a list of available options.

[235] Fix | Delete

[236] Fix | Delete

* @var array

[237] Fix | Delete

[238] Fix | Delete

private $options;

[239] Fix | Delete

[240] Fix | Delete

/**

[241] Fix | Delete

* Whether `data-ampdevmode` was initially set on the the document element.

[242] Fix | Delete

[243] Fix | Delete

* @var bool

[244] Fix | Delete

[245] Fix | Delete

private $hasInitialAmpDevMode = false;

[246] Fix | Delete

[247] Fix | Delete

/**

[248] Fix | Delete

* The original encoding of how the AmpProject\Dom\Document was created.

[249] Fix | Delete

[250] Fix | Delete

* This is stored to do an automatic conversion to UTF-8, which is

[251] Fix | Delete

* a requirement for AMP.

[252] Fix | Delete

[253] Fix | Delete

* @var string

[254] Fix | Delete

[255] Fix | Delete

private $originalEncoding;

[256] Fix | Delete

[257] Fix | Delete

/**

[258] Fix | Delete

* Store the <noscript> markup that was extracted to preserve it during parsing.

[259] Fix | Delete

[260] Fix | Delete

* The array keys are the element IDs for placeholder <meta> tags.

[261] Fix | Delete

[262] Fix | Delete

* @see maybeReplaceNoscriptElements()

[263] Fix | Delete

* @see maybeRestoreNoscriptElements()

[264] Fix | Delete

[265] Fix | Delete

* @var string[]

[266] Fix | Delete

[267] Fix | Delete

private $noscriptPlaceholderComments = [];

[268] Fix | Delete

[269] Fix | Delete

/**

[270] Fix | Delete

* Store whether mustache template tags were replaced and need to be restored.

[271] Fix | Delete

[272] Fix | Delete

* @see replaceMustacheTemplateTokens()

[273] Fix | Delete

[274] Fix | Delete

* @var bool

[275] Fix | Delete

[276] Fix | Delete

private $mustacheTagsReplaced = false;

[277] Fix | Delete

[278] Fix | Delete

/**

[279] Fix | Delete

* Whether we had secured a doctype that needs restoring or not.

[280] Fix | Delete

[281] Fix | Delete

* This is an int as it receives the $count from the preg_replace().

[282] Fix | Delete

[283] Fix | Delete

* @var int

[284] Fix | Delete

[285] Fix | Delete

private $securedDoctype = 0;

[286] Fix | Delete

[287] Fix | Delete

/**

[288] Fix | Delete

* Whether the self-closing tags were transformed and need to be restored.

[289] Fix | Delete

[290] Fix | Delete

* This avoids duplicating this effort (maybe corrupting the DOM) on multiple calls to saveHTML().

[291] Fix | Delete

[292] Fix | Delete

* @var bool

[293] Fix | Delete

[294] Fix | Delete

private $selfClosingTagsTransformed = false;

[295] Fix | Delete

[296] Fix | Delete

/**

[297] Fix | Delete

* Store the emoji that was used to represent the AMP attribute.

[298] Fix | Delete

[299] Fix | Delete

* There are a few variations, so we want to keep track of this.

[300] Fix | Delete

[301] Fix | Delete

* @see https://github.com/ampproject/amphtml/issues/25990

[302] Fix | Delete

[303] Fix | Delete

* @var string

[304] Fix | Delete

[305] Fix | Delete

private $usedAmpEmoji;

[306] Fix | Delete

[307] Fix | Delete

/**

[308] Fix | Delete

* Store the current index by prefix.

[309] Fix | Delete

[310] Fix | Delete

* This is used to generate unique-per-prefix IDs.

[311] Fix | Delete

[312] Fix | Delete

* @var int[]

[313] Fix | Delete

[314] Fix | Delete

private $indexCounter = [];

[315] Fix | Delete

[316] Fix | Delete

/**

[317] Fix | Delete

* The maximum number of bytes of CSS that is enforced.

[318] Fix | Delete

[319] Fix | Delete

* A negative number will disable the byte count limit.

[320] Fix | Delete

[321] Fix | Delete

* @var int

[322] Fix | Delete

[323] Fix | Delete

private $cssMaxByteCountEnforced = -1;

[324] Fix | Delete

[325] Fix | Delete

/**

[326] Fix | Delete

* Store the names of the amp-bind attributes that were converted so that we can restore them later on.

[327] Fix | Delete

[328] Fix | Delete

* @var array<string>

[329] Fix | Delete

[330] Fix | Delete

private $convertedAmpBindAttributes = [];

[331] Fix | Delete

[332] Fix | Delete

/**

[333] Fix | Delete

* Creates a new AmpProject\Dom\Document object

[334] Fix | Delete

[335] Fix | Delete

* @link https://php.net/manual/domdocument.construct.php

[336] Fix | Delete

[337] Fix | Delete

* @param string $version Optional. The version number of the document as part of the XML declaration.

[338] Fix | Delete

* @param string $encoding Optional. The encoding of the document as part of the XML declaration.

[339] Fix | Delete

[340] Fix | Delete

public function __construct($version = '', $encoding = null)

[341] Fix | Delete

{

[342] Fix | Delete

$this->originalEncoding = (string)$encoding ?: Encoding::UNKNOWN;

[343] Fix | Delete

parent::__construct($version ?: '1.0', Encoding::AMP);

[344] Fix | Delete

$this->registerNodeClass(DOMElement::class, Element::class);

[345] Fix | Delete

$this->options = Option::DEFAULTS;

[346] Fix | Delete

}

[347] Fix | Delete

[348] Fix | Delete

/**

[349] Fix | Delete

* Named constructor to provide convenient way of transforming HTML into DOM.

[350] Fix | Delete

[351] Fix | Delete

* @param string $html HTML to turn into a DOM.

[352] Fix | Delete

* @param array|string $options Optional. Array of options to configure the document. Used as encoding if a string

[353] Fix | Delete

* is passed. Defaults to an empty array.

[354] Fix | Delete

* @return Document|false DOM generated from provided HTML, or false if the transformation failed.

[355] Fix | Delete

[356] Fix | Delete

public static function fromHtml($html, $options = [])

[357] Fix | Delete

{

[358] Fix | Delete

// Assume options are the encoding if a string is passed, for BC reasons.

[359] Fix | Delete

if (is_string($options)) {

[360] Fix | Delete

$options = [Option::ENCODING => $options];

[361] Fix | Delete

}

[362] Fix | Delete

[363] Fix | Delete

$encoding = isset($options[Option::ENCODING]) ? $options[Option::ENCODING] : null;

[364] Fix | Delete

[365] Fix | Delete

$dom = new self('', $encoding);

[366] Fix | Delete

[367] Fix | Delete

if (! $dom->loadHTML($html, $options)) {

[368] Fix | Delete

return false;

[369] Fix | Delete

}

[370] Fix | Delete

[371] Fix | Delete

return $dom;

[372] Fix | Delete

}

[373] Fix | Delete

[374] Fix | Delete

/**

[375] Fix | Delete

* Named constructor to provide convenient way of transforming a HTML fragment into DOM.

[376] Fix | Delete

[377] Fix | Delete

* The difference to Document::fromHtml() is that fragments are not normalized as to their structure.

[378] Fix | Delete

[379] Fix | Delete

* @param string $html HTML to turn into a DOM.

[380] Fix | Delete

* @param array|string $options Optional. Array of options to configure the document. Used as encoding if a string

[381] Fix | Delete

* is passed. Defaults to an empty array.

[382] Fix | Delete

* @return Document|false DOM generated from provided HTML, or false if the transformation failed.

[383] Fix | Delete

[384] Fix | Delete

public static function fromHtmlFragment($html, $options = [])

[385] Fix | Delete

{

[386] Fix | Delete

// Assume options are the encoding if a string is passed, for BC reasons.

[387] Fix | Delete

if (is_string($options)) {

[388] Fix | Delete

$options = [Option::ENCODING => $options];

[389] Fix | Delete

}

[390] Fix | Delete

[391] Fix | Delete

$encoding = isset($options[Option::ENCODING]) ? $options[Option::ENCODING] : null;

[392] Fix | Delete

[393] Fix | Delete

$dom = new self('', $encoding);

[394] Fix | Delete

[395] Fix | Delete

if (! $dom->loadHTMLFragment($html, $options)) {

[396] Fix | Delete

return false;

[397] Fix | Delete

}

[398] Fix | Delete

[399] Fix | Delete

return $dom;

[400] Fix | Delete

}

[401] Fix | Delete

[402] Fix | Delete

/**

[403] Fix | Delete

* Named constructor to provide convenient way of retrieving the DOM from a node.

[404] Fix | Delete

[405] Fix | Delete

* @param DOMNode $node Node to retrieve the DOM from. This is being modified by reference (!).

[406] Fix | Delete

* @return Document DOM generated from provided HTML, or false if the transformation failed.

[407] Fix | Delete

[408] Fix | Delete

public static function fromNode(DOMNode &$node)

[409] Fix | Delete

{

[410] Fix | Delete

/**

[411] Fix | Delete

* Document of the node.

[412] Fix | Delete

[413] Fix | Delete

* If the node->ownerDocument returns null, the node is the document.

[414] Fix | Delete

[415] Fix | Delete

* @var DOMDocument

[416] Fix | Delete

[417] Fix | Delete

$root = $node->ownerDocument === null ? $node : $node->ownerDocument;

[418] Fix | Delete

[419] Fix | Delete

if ($root instanceof self) {

[420] Fix | Delete

return $root;

[421] Fix | Delete

}

[422] Fix | Delete

[423] Fix | Delete

$dom = new self();

[424] Fix | Delete

[425] Fix | Delete

// We replace the $node by reference, to make sure the next lines of code will

[426] Fix | Delete

// work as expected with the new document.

[427] Fix | Delete

// Otherwise $dom and $node would refer to two different DOMDocuments.

[428] Fix | Delete

$node = $dom->importNode($node, true);

[429] Fix | Delete

$dom->appendChild($node);

[430] Fix | Delete

[431] Fix | Delete

$dom->hasInitialAmpDevMode = $dom->documentElement->hasAttribute(DevMode::DEV_MODE_ATTRIBUTE);

[432] Fix | Delete

[433] Fix | Delete

return $dom;

[434] Fix | Delete

}

[435] Fix | Delete

[436] Fix | Delete

/**

[437] Fix | Delete

* Reset the internal optimizations of the Document object.

[438] Fix | Delete

[439] Fix | Delete

* This might be needed if you are doing an operation that causes the cached

[440] Fix | Delete

* nodes and XPath objects to point to the wrong document.

[441] Fix | Delete

[442] Fix | Delete

* @return self Reset version of the Document object.

[443] Fix | Delete

[444] Fix | Delete

private function reset()

[445] Fix | Delete

{

[446] Fix | Delete

// Drop references to old DOM document.

[447] Fix | Delete

unset($this->xpath, $this->head, $this->body);

[448] Fix | Delete

[449] Fix | Delete

// Reference of the document itself doesn't change here, but might need to change in the future.

[450] Fix | Delete

return $this;

[451] Fix | Delete

}

[452] Fix | Delete

[453] Fix | Delete

/**

[454] Fix | Delete

* Load HTML from a string.

[455] Fix | Delete

[456] Fix | Delete

* @link https://php.net/manual/domdocument.loadhtml.php

[457] Fix | Delete

[458] Fix | Delete

* @param string $source The HTML string.

[459] Fix | Delete

* @param array|int|string $options Optional. Array of options to configure the document. Used as additional Libxml

[460] Fix | Delete

* parameters if an int or string is passed. Defaults to an empty array.

[461] Fix | Delete

* @return bool true on success or false on failure.

[462] Fix | Delete

[463] Fix | Delete

public function loadHTML($source, $options = [])

[464] Fix | Delete

{

[465] Fix | Delete

$source = $this->normalizeDocumentStructure($source);

[466] Fix | Delete

$success = $this->loadHTMLFragment($source, $options);

[467] Fix | Delete

[468] Fix | Delete

if ($success) {

[469] Fix | Delete

$this->insertMissingCharset();

[470] Fix | Delete

[471] Fix | Delete

// Do some further clean-up.

[472] Fix | Delete

$this->deduplicateTag(Tag::HEAD);

[473] Fix | Delete

$this->deduplicateTag(Tag::BODY);

[474] Fix | Delete

$this->moveInvalidHeadNodesToBody();

[475] Fix | Delete

$this->movePostBodyNodesToBody();

[476] Fix | Delete

$this->convertHeadProfileToLink();

[477] Fix | Delete

}

[478] Fix | Delete

[479] Fix | Delete

return $success;

[480] Fix | Delete

}

[481] Fix | Delete

[482] Fix | Delete

/**

[483] Fix | Delete

* Load a HTML fragment from a string.

[484] Fix | Delete

[485] Fix | Delete

* @param string $source The HTML fragment string.

[486] Fix | Delete

* @param array|int|string $options Optional. Array of options to configure the document. Used as additional Libxml

[487] Fix | Delete

* parameters if an int or string is passed. Defaults to an empty array.

[488] Fix | Delete

* @return bool true on success or false on failure.

[489] Fix | Delete

[490] Fix | Delete

public function loadHTMLFragment($source, $options = [])

[491] Fix | Delete

{

[492] Fix | Delete

// Assume options are the additional libxml flags if a string or int is passed, for BC reasons.

[493] Fix | Delete

if (is_string($options)) {

[494] Fix | Delete

$options = (int) $options;

[495] Fix | Delete

}

[496] Fix | Delete

if (is_int($options)) {

[497] Fix | Delete

$options = [Option::LIBXML_FLAGS => $options];

[498] Fix | Delete

}

[499] Fix | Delete

12 3 4 5