: str_replace(): Passing null to parameter #2 ($replace) of type array|string is deprecated in
namespace Yoast\WP\SEO\Builders;
use WP_HTML_Tag_Processor;
use Yoast\WP\SEO\Helpers\Image_Helper;
use Yoast\WP\SEO\Helpers\Indexable_Helper;
use Yoast\WP\SEO\Helpers\Options_Helper;
use Yoast\WP\SEO\Helpers\Post_Helper;
use Yoast\WP\SEO\Helpers\Url_Helper;
use Yoast\WP\SEO\Models\Indexable;
use Yoast\WP\SEO\Models\SEO_Links;
use Yoast\WP\SEO\Repositories\Indexable_Repository;
use Yoast\WP\SEO\Repositories\SEO_Links_Repository;
* Indexable link builder.
class Indexable_Link_Builder {
* The SEO links repository.
* @var SEO_Links_Repository
protected $seo_links_repository;
protected $indexable_helper;
protected $options_helper;
* The indexable repository.
* @var Indexable_Repository
protected $indexable_repository;
* Indexable_Link_Builder constructor.
* @param SEO_Links_Repository $seo_links_repository The SEO links repository.
* @param Url_Helper $url_helper The URL helper.
* @param Post_Helper $post_helper The post helper.
* @param Options_Helper $options_helper The options helper.
* @param Indexable_Helper $indexable_helper The indexable helper.
public function __construct(
SEO_Links_Repository $seo_links_repository,
Post_Helper $post_helper,
Options_Helper $options_helper,
Indexable_Helper $indexable_helper
$this->seo_links_repository = $seo_links_repository;
$this->url_helper = $url_helper;
$this->post_helper = $post_helper;
$this->options_helper = $options_helper;
$this->indexable_helper = $indexable_helper;
* Sets the indexable repository.
* @param Indexable_Repository $indexable_repository The indexable repository.
* @param Image_Helper $image_helper The image helper.
public function set_dependencies(
Indexable_Repository $indexable_repository,
Image_Helper $image_helper
$this->indexable_repository = $indexable_repository;
$this->image_helper = $image_helper;
* Builds the links for a post.
* @param Indexable $indexable The indexable.
* @param string $content The content. Expected to be unfiltered.
* @return SEO_Links[] The created SEO links.
public function build( $indexable, $content ) {
if ( ! $this->indexable_helper->should_index_indexable( $indexable ) ) {
if ( $indexable->object_type === 'post' ) {
// phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited -- To setup the post we need to do this explicitly.
$post = $this->post_helper->get_post( $indexable->object_id );
\setup_postdata( $post );
$content = \apply_filters( 'the_content', $content );
// phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited -- To setup the post we need to do this explicitly.
$content = \str_replace( ']]>', ']]>', $content );
$links = $this->gather_links( $content );
$images = $this->gather_images( $content );
if ( empty( $links ) && empty( $images ) ) {
$indexable->link_count = 0;
$this->update_related_indexables( $indexable, [] );
$links = $this->create_links( $indexable, $links, $images );
$this->update_related_indexables( $indexable, $links );
$indexable->link_count = $this->get_internal_link_count( $links );
* Deletes all SEO links for an indexable.
* @param Indexable $indexable The indexable.
public function delete( $indexable ) {
$links = ( $this->seo_links_repository->find_all_by_indexable_id( $indexable->id ) );
$this->seo_links_repository->delete_all_by_indexable_id( $indexable->id );
$linked_indexable_ids = [];
foreach ( $links as $link ) {
if ( $link->target_indexable_id ) {
$linked_indexable_ids[] = $link->target_indexable_id;
$this->update_incoming_links_for_related_indexables( $linked_indexable_ids );
* Fixes existing SEO links that are supposed to have a target indexable but don't, because of prior indexable
* @param Indexable $indexable The indexable to be the target of SEO Links.
public function patch_seo_links( Indexable $indexable ) {
if ( ! empty( $indexable->id ) && ! empty( $indexable->object_id ) ) {
$links = $this->seo_links_repository->find_all_by_target_post_id( $indexable->object_id );
$updated_indexable = false;
foreach ( $links as $link ) {
if ( \is_a( $link, SEO_Links::class ) && empty( $link->target_indexable_id ) ) {
// Since that post ID exists in an SEO link but has no target_indexable_id, it's probably because of prior indexable cleanup.
$this->seo_links_repository->update_target_indexable_id( $link->id, $indexable->id );
$updated_indexable = true;
if ( $updated_indexable ) {
$updated_indexable_id = [ $indexable->id ];
$this->update_incoming_links_for_related_indexables( $updated_indexable_id );
* Gathers all links from content.
* @param string $content The content.
* @return string[] An array of urls.
protected function gather_links( $content ) {
if ( \strpos( $content, 'href' ) === false ) {
$regexp = '<a\s[^>]*href=("??)([^" >]*?)\1[^>]*>';
// Used modifiers iU to match case insensitive and make greedy quantifiers lazy.
if ( \preg_match_all( "/$regexp/iU", $content, $matches, \PREG_SET_ORDER ) ) {
foreach ( $matches as $match ) {
$links[] = \trim( $match[2], "'" );
* Gathers all images from content with WP's WP_HTML_Tag_Processor() and returns them along with their IDs, if
* @param string $content The content.
* @return int[] An associated array of image IDs, keyed by their URL.
protected function gather_images_wp( $content ) {
$processor = new WP_HTML_Tag_Processor( $content );
* Filter 'wpseo_image_attribute_containing_id' - Allows filtering what attribute will be used to extract image IDs from.
* Defaults to "class", which is where WP natively stores the image IDs, in a `wp-image-<ID>` format.
* @api string The attribute to be used to extract image IDs from.
$attribute = \apply_filters( 'wpseo_image_attribute_containing_id', 'class' );
while ( $processor->next_tag( $query ) ) {
$src = \htmlentities( $processor->get_attribute( 'src' ), ( \ENT_QUOTES | \ENT_SUBSTITUTE | \ENT_HTML401 ), \get_bloginfo( 'charset' ) );
$classes = $processor->get_attribute( $attribute );
$id = $this->extract_id_of_classes( $classes );
* Gathers all images from content with DOMDocument() and returns them along with their IDs, if possible.
* @param string $content The content.
* @return int[] An associated array of image IDs, keyed by their URL.
protected function gather_images_domdocument( $content ) {
$charset = \get_bloginfo( 'charset' );
* Filter 'wpseo_image_attribute_containing_id' - Allows filtering what attribute will be used to extract image IDs from.
* Defaults to "class", which is where WP natively stores the image IDs, in a `wp-image-<ID>` format.
* @api string The attribute to be used to extract image IDs from.
$attribute = \apply_filters( 'wpseo_image_attribute_containing_id', 'class' );
\libxml_use_internal_errors( true );
$post_dom = new DOMDocument();
$post_dom->loadHTML( '<?xml encoding="' . $charset . '">' . $content );
foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) {
$src = \htmlentities( $img->getAttribute( 'src' ), ( \ENT_QUOTES | \ENT_SUBSTITUTE | \ENT_HTML401 ), $charset );
$classes = $img->getAttribute( $attribute );
$id = $this->extract_id_of_classes( $classes );
* Extracts image ID out of the image's classes.
* @param string $classes The classes assigned to the image.
* @return int The ID that's extracted from the classes.
protected function extract_id_of_classes( $classes ) {
* Filter 'wpseo_extract_id_pattern' - Allows filtering the regex patern to be used to extract image IDs from class/attribute names.
* Defaults to the pattern that extracts image IDs from core's `wp-image-<ID>` native format in image classes.
* @api string The regex pattern to be used to extract image IDs from class names. Empty string if the whole class/attribute should be returned.
$pattern = \apply_filters( 'wpseo_extract_id_pattern', '/(?<!\S)wp-image-(\d+)(?!\S)/i' );
if ( \preg_match( $pattern, $classes, $matches ) ) {
return (int) $matches[1];
* Gathers all images from content.
* @param string $content The content.
* @return int[] An associated array of image IDs, keyed by their URLs.
protected function gather_images( $content ) {
* Filter 'wpseo_force_creating_and_using_attachment_indexables' - Filters if we should use attachment indexables to find all content images. Instead of scanning the content.
* The default value is false.
$should_not_parse_content = \apply_filters( 'wpseo_force_creating_and_using_attachment_indexables', false );
* Filter 'wpseo_force_skip_image_content_parsing' - Filters if we should force skip scanning the content to parse images.
* This filter can be used if the regex gives a faster result than scanning the code.
* The default value is false.
$should_not_parse_content = \apply_filters( 'wpseo_force_skip_image_content_parsing', $should_not_parse_content );
if ( ! $should_not_parse_content && \class_exists( WP_HTML_Tag_Processor::class ) ) {
return $this->gather_images_wp( $content );
if ( ! $should_not_parse_content && \class_exists( DOMDocument::class ) ) {
return $this->gather_images_DOMDocument( $content );
if ( \strpos( $content, 'src' ) === false ) {
$regexp = '<img\s[^>]*src=("??)([^" >]*?)\\1[^>]*>';
// Used modifiers iU to match case insensitive and make greedy quantifiers lazy.
if ( \preg_match_all( "/$regexp/iU", $content, $matches, \PREG_SET_ORDER ) ) {
foreach ( $matches as $match ) {
$images[ $match[2] ] = 0;
* Creates link models from lists of URLs and image sources.
* @param Indexable $indexable The indexable.
* @param string[] $links The link URLs.
* @param int[] $images The image sources.
* @return SEO_Links[] The link models.
protected function create_links( $indexable, $links, $images ) {
$home_url = \wp_parse_url( \home_url() );
$current_url = \wp_parse_url( $indexable->permalink );
function ( $link ) use ( $home_url, $indexable ) {
return $this->create_internal_link( $link, $home_url, $indexable );
// Filter out links to the same page with a fragment or query.
function ( $link ) use ( $current_url ) {
return $this->filter_link( $link, $current_url );
foreach ( $images as $image_url => $image_id ) {
$image_links[] = $this->create_internal_link( $image_url, $home_url, $indexable, true, $image_id );
return \array_merge( $links, $image_links );
* Get the post ID based on the link's type and its target's permalink.
* @param string $type The type of link (either SEO_Links::TYPE_INTERNAL or SEO_Links::TYPE_INTERNAL_IMAGE).
* @param string $permalink The permalink of the link's target.
* @return int The post ID.
protected function get_post_id( $type, $permalink ) {
if ( $type === SEO_Links::TYPE_INTERNAL ) {
return \url_to_postid( $permalink );
return $this->image_helper->get_attachment_by_url( $permalink );
* Creates an internal link.
* @param string $url The url of the link.
* @param array $home_url The home url, as parsed by wp_parse_url.
* @param Indexable $indexable The indexable of the post containing the link.
* @param bool $is_image Whether or not the link is an image.
* @param int $image_id The ID of the internal image.
* @return SEO_Links The created link.
protected function create_internal_link( $url, $home_url, $indexable, $is_image = false, $image_id = 0 ) {
$parsed_url = \wp_parse_url( $url );
$link_type = $this->url_helper->get_link_type( $parsed_url, $home_url, $is_image );
* ORM representing a link in the SEO Links table.
$model = $this->seo_links_repository->query()->create(
'indexable_id' => $indexable->id,
'post_id' => $indexable->object_id,
$model->parsed_url = $parsed_url;
if ( $model->type === SEO_Links::TYPE_INTERNAL ) {
$permalink = $this->build_permalink( $url, $home_url );
return $this->enhance_link_from_indexable( $model, $permalink );
if ( $model->type === SEO_Links::TYPE_INTERNAL_IMAGE ) {
$permalink = $this->build_permalink( $url, $home_url );
/** The `wpseo_force_creating_and_using_attachment_indexables` filter is documented in indexable-link-builder.php */
if ( ! $this->options_helper->get( 'disable-attachment' ) || \apply_filters( 'wpseo_force_creating_and_using_attachment_indexables', false ) ) {
$model = $this->enhance_link_from_indexable( $model, $permalink );
$target_post_id = ( $image_id !== 0 ) ? $image_id : WPSEO_Image_Utils::get_attachment_by_url( $permalink );
if ( ! empty( $target_post_id ) ) {
$model->target_post_id = $target_post_id;
if ( $model->target_post_id ) {
$file = \get_attached_file( $model->target_post_id );
if ( \file_exists( $file ) ) {
$model->size = \filesize( $file );