Consider the following array:
/www/htdocs/1/sites/lib/abcdedd
/www/htdocs/1/sites/conf/xyz
/www/htdocs/1/sites/conf/abc/
Perhaps porting the algorithm Python's os.path.commonprefix(m)
uses would work?
def commonprefix(m):
"Given a list of pathnames, returns the longest common leading component"
if not m: return ''
s1 = min(m)
s2 = max(m)
n = min(len(s1), len(s2))
for i in xrange(n):
if s1[i] != s2[i]:
return s1[:i]
return s1[:n]
That is, uh... something like
function commonprefix($m) {
if(!$m) return "";
$s1 = min($m);
$s2 = max($m);
$n = min(strlen($s1), strlen($s2));
for($i=0;$i<$n;$i++) if($s1[$i] != $s2[$i]) return substr($s1, 0, $i);
return substr($s1, 0, $n);
}
After that you can just substr each element of the original list with the length of the common prefix as the start offset.
Probably too naive and noobish but it works. I have used this algorithm:
<?php
function strlcs($str1, $str2){
$str1Len = strlen($str1);
$str2Len = strlen($str2);
$ret = array();
if($str1Len == 0 || $str2Len == 0)
return $ret; //no similarities
$CSL = array(); //Common Sequence Length array
$intLargestSize = 0;
//initialize the CSL array to assume there are no similarities
for($i=0; $i<$str1Len; $i++){
$CSL[$i] = array();
for($j=0; $j<$str2Len; $j++){
$CSL[$i][$j] = 0;
}
}
for($i=0; $i<$str1Len; $i++){
for($j=0; $j<$str2Len; $j++){
//check every combination of characters
if( $str1[$i] == $str2[$j] ){
//these are the same in both strings
if($i == 0 || $j == 0)
//it's the first character, so it's clearly only 1 character long
$CSL[$i][$j] = 1;
else
//it's one character longer than the string from the previous character
$CSL[$i][$j] = $CSL[$i-1][$j-1] + 1;
if( $CSL[$i][$j] > $intLargestSize ){
//remember this as the largest
$intLargestSize = $CSL[$i][$j];
//wipe any previous results
$ret = array();
//and then fall through to remember this new value
}
if( $CSL[$i][$j] == $intLargestSize )
//remember the largest string(s)
$ret[] = substr($str1, $i-$intLargestSize+1, $intLargestSize);
}
//else, $CSL should be set to 0, which it was already initialized to
}
}
//return the list of matches
return $ret;
}
$arr = array(
'/www/htdocs/1/sites/lib/abcdedd',
'/www/htdocs/1/sites/conf/xyz',
'/www/htdocs/1/sites/conf/abc/def',
'/www/htdocs/1/sites/htdocs/xyz',
'/www/htdocs/1/sites/lib2/abcdedd'
);
// find the common substring
$longestCommonSubstring = strlcs( $arr[0], $arr[1] );
// remvoe the common substring
foreach ($arr as $k => $v) {
$arr[$k] = str_replace($longestCommonSubstring[0], '', $v);
}
var_dump($arr);
Output:
array(5) {
[0]=>
string(11) "lib/abcdedd"
[1]=>
string(8) "conf/xyz"
[2]=>
string(12) "conf/abc/def"
[3]=>
string(10) "htdocs/xyz"
[4]=>
string(12) "lib2/abcdedd"
}
:)
Write a function longest_common_prefix
that takes two strings as input. Then apply it to the strings in any order to reduce them to their common prefix. Since it is associative and commutative the order doesn't matter for the result.
This is the same as for other binary operations like for example addition or greatest common divisor.
$values = array('/www/htdocs/1/sites/lib/abcdedd',
'/www/htdocs/1/sites/conf/xyz',
'/www/htdocs/1/sites/conf/abc/def',
'/www/htdocs/1/sites/htdocs/xyz',
'/www/htdocs/1/sites/lib2/abcdedd'
);
function splitArrayValues($r) {
return explode('/',$r);
}
function stripCommon($values) {
$testValues = array_map('splitArrayValues',$values);
$i = 0;
foreach($testValues[0] as $key => $value) {
foreach($testValues as $arraySetValues) {
if ($arraySetValues[$key] != $value) break 2;
}
$i++;
}
$returnArray = array();
foreach($testValues as $value) {
$returnArray[] = implode('/',array_slice($value,$i));
}
return $returnArray;
}
$newValues = stripCommon($values);
echo '<pre>';
var_dump($newValues);
echo '</pre>';
EDIT Variant of my original method using an array_walk to rebuild the array
$values = array('/www/htdocs/1/sites/lib/abcdedd',
'/www/htdocs/1/sites/conf/xyz',
'/www/htdocs/1/sites/conf/abc/def',
'/www/htdocs/1/sites/htdocs/xyz',
'/www/htdocs/1/sites/lib2/abcdedd'
);
function splitArrayValues($r) {
return explode('/',$r);
}
function rejoinArrayValues(&$r,$d,$i) {
$r = implode('/',array_slice($r,$i));
}
function stripCommon($values) {
$testValues = array_map('splitArrayValues',$values);
$i = 0;
foreach($testValues[0] as $key => $value) {
foreach($testValues as $arraySetValues) {
if ($arraySetValues[$key] != $value) break 2;
}
$i++;
}
array_walk($testValues, 'rejoinArrayValues', $i);
return $testValues;
}
$newValues = stripCommon($values);
echo '<pre>';
var_dump($newValues);
echo '</pre>';
EDIT
The most efficient and elegant answer is likely to involve taking functions and methods from each of the provided answers
Load them into a trie data structure. Starting from the parent node, see which is having a children count great than one. Once you find that magic node, just dismantle the parent node structure and have the current node as root.
The problem can be simplified if just viewed from the string comparison angle. This is probably faster than array-splitting:
$longest = $tetris[0]; # or array_pop()
foreach ($tetris as $cmp) {
while (strncmp($longest+"/", $cmp, strlen($longest)+1) !== 0) {
$longest = substr($longest, 0, strrpos($longest, "/"));
}
}