: str_replace(): Passing null to parameter #2 ($replace) of type array|string is deprecated in
* Sequence matcher for Diff
* Copyright (c) 2009 Chris Boulton <chris.boulton@interspire.com>
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the Chris Boulton nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* @author Chris Boulton <chris.boulton@interspire.com>
* @copyright (c) 2009 Chris Boulton
* @license New BSD License http://www.opensource.org/licenses/bsd-license.php
* @link http://github.com/chrisboulton/php-diff
class Diff_SequenceMatcher
* @var string|array Either a string or an array containing a callback function to determine if a line is "junk" or not.
private $junkCallback = null;
* @var array The first sequence to compare against.
* @var array The second sequence.
* @var array Array of characters that are considered junk from the second sequence. Characters are the array key.
private $junkDict = array();
* @var array Array of indices that do not contain junk elements.
private $options = array();
private $defaultOptions = array(
'ignoreNewLines' => false,
'ignoreWhitespace' => false,
private $matchingBlocks = null;
private $fullBCount = null;
* The constructor. With the sequences being passed, they'll be set for the
* sequence matcher and it will perform a basic cleanup & calculate junk
* @param string|array $a A string or array containing the lines to compare against.
* @param string|array $b A string or array containing the lines to compare.
* @param string|array $junkCallback Either an array or string that references a callback function (if there is one) to determine 'junk' characters.
public function __construct($a, $b, $junkCallback=null, $options=array())
$this->junkCallback = $junkCallback;
$this->setOptions($options);
$this->setSequences($a, $b);
public function setOptions($options)
$this->options = array_merge($this->defaultOptions, $options);
* Set the first and second sequences to use with the sequence matcher.
* @param string|array $a A string or array containing the lines to compare against.
* @param string|array $b A string or array containing the lines to compare.
public function setSequences($a, $b)
* Set the first sequence ($a) and reset any internal caches to indicate that
* when calling the calculation methods, we need to recalculate them.
* @param string|array $a The sequence to set as the first sequence.
public function setSeq1($a)
$this->matchingBlocks = null;
* Set the second sequence ($b) and reset any internal caches to indicate that
* when calling the calculation methods, we need to recalculate them.
* @param string|array $b The sequence to set as the second sequence.
public function setSeq2($b)
$this->matchingBlocks = null;
$this->fullBCount = null;
* Generate the internal arrays containing the list of junk and non-junk
* characters for the second ($b) sequence.
private function chainB()
$length = count ($this->b);
for($i = 0; $i < $length; ++$i) {
if(isset($this->b2j[$char])) {
if($length >= 200 && count($this->b2j[$char]) * 100 > $length) {
unset($this->b2j[$char]);
$this->b2j[$char][] = $i;
$this->b2j[$char] = array(
foreach(array_keys($popularDict) as $char) {
unset($this->b2j[$char]);
$this->junkDict = array();
if(is_callable($this->junkCallback)) {
foreach(array_keys($popularDict) as $char) {
if(call_user_func($this->junkCallback, $char)) {
$this->junkDict[$char] = 1;
unset($popularDict[$char]);
foreach(array_keys($this->b2j) as $char) {
if(call_user_func($this->junkCallback, $char)) {
$this->junkDict[$char] = 1;
unset($this->b2j[$char]);
* Checks if a particular character is in the junk dictionary
* for the list of junk characters.
* @return boolean $b True if the character is considered junk. False if not.
private function isBJunk($b)
if(isset($this->juncDict[$b])) {
* Find the longest matching block in the two sequences, as defined by the
* lower and upper constraints for each sequence. (for the first sequence,
* $alo - $ahi and for the second sequence, $blo - $bhi)
* Essentially, of all of the maximal matching blocks, return the one that
* startest earliest in $a, and all of those maximal matching blocks that
* start earliest in $a, return the one that starts earliest in $b.
* If the junk callback is defined, do the above but with the restriction
* that the junk element appears in the block. Extend it as far as possible
* by matching only junk elements in both $a and $b.
* @param int $alo The lower constraint for the first sequence.
* @param int $ahi The upper constraint for the first sequence.
* @param int $blo The lower constraint for the second sequence.
* @param int $bhi The upper constraint for the second sequence.
* @return array Array containing the longest match that includes the starting position in $a, start in $b and the length/size.
public function findLongestMatch($alo, $ahi, $blo, $bhi)
for($i = $alo; $i < $ahi; ++$i) {
$jDict = $this->arrayGetDefault($this->b2j, $a[$i], $nothing);
foreach($jDict as $jKey => $j) {
$k = $this->arrayGetDefault($j2Len, $j -1, 0) + 1;
while($bestI > $alo && $bestJ > $blo && !$this->isBJunk($b[$bestJ - 1]) &&
!$this->linesAreDifferent($bestI - 1, $bestJ - 1)) {
while($bestI + $bestSize < $ahi && ($bestJ + $bestSize) < $bhi &&
!$this->isBJunk($b[$bestJ + $bestSize]) && !$this->linesAreDifferent($bestI + $bestSize, $bestJ + $bestSize)) {
while($bestI > $alo && $bestJ > $blo && $this->isBJunk($b[$bestJ - 1]) &&
!$this->isLineDifferent($bestI - 1, $bestJ - 1)) {
while($bestI + $bestSize < $ahi && $bestJ + $bestSize < $bhi &&
$this->isBJunk($b[$bestJ + $bestSize]) && !$this->linesAreDifferent($bestI + $bestSize, $bestJ + $bestSize)) {
* Check if the two lines at the given indexes are different or not.
* @param int $aIndex Line number to check against in a.
* @param int $bIndex Line number to check against in b.
* @return boolean True if the lines are different and false if not.
public function linesAreDifferent($aIndex, $bIndex)
$lineA = $this->a[$aIndex];
$lineB = $this->b[$bIndex];
if($this->options['ignoreWhitespace']) {
$replace = array("\t", ' ');
$lineA = str_replace($replace, '', $lineA);
$lineB = str_replace($replace, '', $lineB);
if($this->options['ignoreCase']) {
$lineA = strtolower($lineA);
$lineB = strtolower($lineB);
* Return a nested set of arrays for all of the matching sub-sequences
* in the strings $a and $b.
* Each block contains the lower constraint of the block in $a, the lower
* constraint of the block in $b and finally the number of lines that the
* @return array Nested array of the matching blocks, as described by the function.
public function getMatchingBlocks()
if(!empty($this->matchingBlocks)) {
return $this->matchingBlocks;
$aLength = count($this->a);
$bLength = count($this->b);
$matchingBlocks = array();
list($alo, $ahi, $blo, $bhi) = array_pop($queue);
$x = $this->findLongestMatch($alo, $ahi, $blo, $bhi);
if($alo < $i && $blo < $j) {
if($i + $k < $ahi && $j + $k < $bhi) {
usort($matchingBlocks, array($this, 'tupleSort'));
foreach($matchingBlocks as $block) {
list($i2, $j2, $k2) = $block;
if($i1 + $k1 == $i2 && $j1 + $k1 == $j2) {
$this->matchingBlocks = $nonAdjacent;
return $this->matchingBlocks;
* Return a list of all of the opcodes for the differences between the
* The nested array returned contains an array describing the opcode
* 0 - The type of tag (as described below) for the opcode.
* 1 - The beginning line in the first sequence.
* 2 - The end line in the first sequence.
* 3 - The beginning line in the second sequence.
* 4 - The end line in the second sequence.
* The different types of tags include:
* replace - The string from $i1 to $i2 in $a should be replaced by
* the string in $b from $j1 to $j2.
* delete - The string in $a from $i1 to $j2 should be deleted.
* insert - The string in $b from $j1 to $j2 should be inserted at
* equal - The two strings with the specified ranges are equal.
* @return array Array of the opcodes describing the differences between the strings.
public function getOpCodes()
if(!empty($this->opCodes)) {
$this->opCodes = array();
$blocks = $this->getMatchingBlocks();
foreach($blocks as $block) {
list($ai, $bj, $size) = $block;
if($i < $ai && $j < $bj) {
$this->opCodes[] = array(
$this->opCodes[] = array(