I have this example and i want to know how to get this result. I have text and I tokenize it then I collect the bigram and trigram and fourgram like that
im
Try everygrams
:
from nltk import everygrams
list(everygrams('hello', 1, 5))
[out]:
[('h',),
('e',),
('l',),
('l',),
('o',),
('h', 'e'),
('e', 'l'),
('l', 'l'),
('l', 'o'),
('h', 'e', 'l'),
('e', 'l', 'l'),
('l', 'l', 'o'),
('h', 'e', 'l', 'l'),
('e', 'l', 'l', 'o'),
('h', 'e', 'l', 'l', 'o')]
Word tokens:
from nltk import everygrams
list(everygrams('hello word is a fun program'.split(), 1, 5))
[out]:
[('hello',),
('word',),
('is',),
('a',),
('fun',),
('program',),
('hello', 'word'),
('word', 'is'),
('is', 'a'),
('a', 'fun'),
('fun', 'program'),
('hello', 'word', 'is'),
('word', 'is', 'a'),
('is', 'a', 'fun'),
('a', 'fun', 'program'),
('hello', 'word', 'is', 'a'),
('word', 'is', 'a', 'fun'),
('is', 'a', 'fun', 'program'),
('hello', 'word', 'is', 'a', 'fun'),
('word', 'is', 'a', 'fun', 'program')]