input char:a (unicode:97) output type:2
input char:Space (unicode:32) output type:12
in java i can use code: \"int type = Character.getType(unico
There is a regexp plugin which supports Unicode categories: http://xregexp.com/plugins/.
Using that, you could create a function that checks for each category like:
var types = [
'Ll', 'Lu', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Me', 'Nd', 'Nl',
'No', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Pc', 'Po', 'Sm', 'Sc',
'Sk', 'So', 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Co', 'Cs', 'Cn'
];
function getType(char) {
var char = (char + "").charAt(0);
for(var i = 0; i < types.length; i++) {
if(XRegExp("\\p{" + types[i] + "}").test(char)) {
return types[i];
}
}
}
alert(getType(" ")); // alerts Zs, because " " is a space separator character
http://jsfiddle.net/pimvdb/mYfCZ/1/