How do I split a string with multiple separators in JavaScript? I\'m trying to split on both commas and spaces but, AFAIK, JS\'s split function only supports one separator.
For those of you who want more customization in their splitting function, I wrote a recursive algorithm that splits a given string with a list of characters to split on. I wrote this before I saw the above post. I hope it helps some frustrated programmers.
splitString = function(string, splitters) {
var list = [string];
for(var i=0, len=splitters.length; i<len; i++) {
traverseList(list, splitters[i], 0);
}
return flatten(list);
}
traverseList = function(list, splitter, index) {
if(list[index]) {
if((list.constructor !== String) && (list[index].constructor === String))
(list[index] != list[index].split(splitter)) ? list[index] = list[index].split(splitter) : null;
(list[index].constructor === Array) ? traverseList(list[index], splitter, 0) : null;
(list.constructor === Array) ? traverseList(list, splitter, index+1) : null;
}
}
flatten = function(arr) {
return arr.reduce(function(acc, val) {
return acc.concat(val.constructor === Array ? flatten(val) : val);
},[]);
}
var stringToSplit = "people and_other/things";
var splitList = [" ", "_", "/"];
splitString(stringToSplit, splitList);
Example above returns: ["people", "and", "other", "things"]
Note: flatten
function was taken from Rosetta Code
I use regexp:
str = 'Write a program that extracts from a given text all palindromes, e.g. "ABBA", "lamal", "exe".';
var strNew = str.match(/\w+/g);
// Output: ["Write", "a", "program", "that", "extracts", "from", "a", "given", "text", "all", "palindromes", "e", "g", "ABBA", "lamal", "exe"]
Another simple but effective method is to use split + join repeatedly.
"a=b,c:d".split('=').join(',').split(':').join(',').split(',')
Essentially doing a split followed by a join is like a global replace so this replaces each separator with a comma then once all are replaced it does a final split on comma
The result of the above expression is:
['a', 'b', 'c', 'd']
Expanding on this you could also place it in a function:
function splitMulti(str, tokens){
var tempChar = tokens[0]; // We can use the first token as a temporary join character
for(var i = 1; i < tokens.length; i++){
str = str.split(tokens[i]).join(tempChar);
}
str = str.split(tempChar);
return str;
}
Usage:
splitMulti('a=b,c:d', ['=', ',', ':']) // ["a", "b", "c", "d"]
If you use this functionality a lot it might even be worth considering wrapping String.prototype.split
for convenience (I think my function is fairly safe - the only consideration is the additional overhead of the conditionals (minor) and the fact that it lacks an implementation of the limit argument if an array is passed).
Be sure to include the splitMulti
function if using this approach to the below simply wraps it :). Also worth noting that some people frown on extending built-ins (as many people do it wrong and conflicts can occur) so if in doubt speak to someone more senior before using this or ask on SO :)
var splitOrig = String.prototype.split; // Maintain a reference to inbuilt fn
String.prototype.split = function (){
if(arguments[0].length > 0){
if(Object.prototype.toString.call(arguments[0]) == "[object Array]" ) { // Check if our separator is an array
return splitMulti(this, arguments[0]); // Call splitMulti
}
}
return splitOrig.apply(this, arguments); // Call original split maintaining context
};
Usage:
var a = "a=b,c:d";
a.split(['=', ',', ':']); // ["a", "b", "c", "d"]
// Test to check that the built-in split still works (although our wrapper wouldn't work if it didn't as it depends on it :P)
a.split('='); // ["a", "b,c:d"]
Enjoy!
I think it's easier if you specify what you wanna leave, instead of what you wanna remove.
As if you wanna have only English words, you can use something like this:
text.match(/[a-z'\-]+/gi);
Examples (run snippet):
var R=[/[a-z'\-]+/gi,/[a-z'\-\s]+/gi];
var s=document.getElementById('s');
for(var i=0;i<R.length;i++)
{
var o=document.createElement('option');
o.innerText=R[i]+'';
o.value=i;
s.appendChild(o);
}
var t=document.getElementById('t');
var r=document.getElementById('r');
s.onchange=function()
{
r.innerHTML='';
var x=s.value;
if((x>=0)&&(x<R.length))
x=t.value.match(R[x]);
for(i=0;i<x.length;i++)
{
var li=document.createElement('li');
li.innerText=x[i];
r.appendChild(li);
}
}
<textarea id="t" style="width:70%;height:12em">even, test; spider-man
But saying o'er what I have said before:
My child is yet a stranger in the world;
She hath not seen the change of fourteen years,
Let two more summers wither in their pride,
Ere we may think her ripe to be a bride.
—Shakespeare, William. The Tragedy of Romeo and Juliet</textarea>
<p><select id="s">
<option selected>Select a regular expression</option>
<!-- option value="1">/[a-z'\-]+/gi</option>
<option value="2">/[a-z'\-\s]+/gi</option -->
</select></p>
<ol id="r" style="display:block;width:auto;border:1px inner;overflow:scroll;height:8em;max-height:10em;"></ol>
</div>
Tricky method:
var s = "dasdnk asd, (naks) :d skldma";
var a = s.replace('(',' ').replace(')',' ').replace(',',' ').split(' ');
console.log(a);//["dasdnk", "asd", "naks", ":d", "skldma"]
Splitting URL by .com/ or .net/
url.split(/\.com\/|\.net\//)