I’m using this character, double sharp \'
String.prototype.codes = function() { return [...this].length };
String.prototype.chars = function() {
let GraphemeSplitter = require('grapheme-splitter');
return (new GraphemeSplitter()).countGraphemes(this);
}
console.log("F
Javascript (and Java) strings use UTF-16 encoding.
Unicode codepoint U+0046 (F
) is encoded in UTF-16 using 1 codeunit: 0x0046
Unicode codepoint U+1D12A (
That's the function I wrote to get string length in codepoint length
function nbUnicodeLength(string){
var stringIndex = 0;
var unicodeIndex = 0;
var length = string.length;
var second;
var first;
while (stringIndex < length) {
first = string.charCodeAt(stringIndex); // returns an integer between 0 and 65535 representing the UTF-16 code unit at the given index.
if (first >= 0xD800 && first <= 0xDBFF && string.length > stringIndex + 1) {
second = string.charCodeAt(stringIndex + 1);
if (second >= 0xDC00 && second <= 0xDFFF) {
stringIndex += 2;
} else {
stringIndex += 1;
}
} else {
stringIndex += 1;
}
unicodeIndex += 1;
}
return unicodeIndex;
}
To sumarize my comments:
That's just the lenght of that string.
Some chars involve other chars as well, even if it looks like a single character. "̉mủt̉ả̉̉̉t̉ẻd̉W̉ỏ̉r̉̉d̉̉".length == 24
From this (great) blog post, they have a function that will return correct length:
function fancyCount(str){
const joiner = "\u{200D}";
const split = str.split(joiner);
let count = 0;
for(const s of split){
//removing the variation selectors
const num = Array.from(s.split(/[\ufe00-\ufe0f]/).join("")).length;
count += num;
}
//assuming the joiners are used appropriately
return count / split.length;
}
console.log(fancyCount("F