I am trying to replace in a string all non word characters with empty string expect for spaces and the put together all multiple spaces as one single space.
Followin
function create_slug_html($string, $ext='.html'){
$replace = '-';
$string=strtolower($string);
$string=trim($string);
mb_regex_encoding("UTF-8");
//regex could also be \W
$string= mb_ereg_replace('[^\w]', ' ', $string);
$string= mb_ereg_replace('\s+', ' ', $string);
//remove query string
if(preg_match("#^http(s)?://[a-z0-9-_.]+\.[a-z]{2,4}#i",$string)){
$parsed_url = parse_url($string);
$string = $parsed_url['host'].' '.$parsed_url['path'];
//if want to add scheme eg. http, https than uncomment next line
//$string = $parsed_url['scheme'].' '.$string;
}
//replace / and . with white space
$string = preg_replace("/[\/\.]/", " ", $string);
// $string = preg_replace("/[^a-z0-9_\s-]/", "", $string);
//remove multiple dashes or whitespaces
$string = preg_replace("/[\s-]+/", " ", $string);
//convert whitespaces and underscore to $replace
$string = preg_replace("/[\s_]/", $replace, $string);
//limit the slug size
$string = substr($string, 0, 200);
//slug is generated
return ($ext) ? $string.$ext : $string;
}
please check is it ok and support english and unicode