I have a large block of text, and I would like to find out the most common words being used (except for a few, like \"the\", \"a\", \"and\", etc).
How would I go about
You should split the string into words, then loop through the words and increment a counter for each one:
var wordCounts = { };
var words = str.split(/\b/);
for(var i = 0; i < words.length; i++)
wordCounts["_" + words[i]] = (wordCounts["_" + words[i]] || 0) + 1;
The "_" +
allows it to process words like constructor
that are already properties of the object.
You may want to write words[i].toLowerCase()
to count case-insensitively.
by this function, you can have a list of most frequent words. this function returns an array.
findMostFrequentWords = (string) => {
var wordsArray = string.split(/\s/);
var wordOccurrences = []
for (var i = 0; i < wordsArray.length; i++) {
wordOccurrences[wordsArray[i]] = (wordOccurrences[wordsArray[i]] || 0) + 1;
}
const maximum = Object.keys(wordOccurrences).reduce(function (accomulated, current) {
return wordOccurrences[current] >= wordOccurrences[accomulated] ? current : accomulated;
});
const result = []
Object.keys(wordOccurrences).map((word) => {
if (wordOccurrences[word] === wordOccurrences[maximum])
result.push(word);
})
return result
}