I have this example and i want to know how to get this result. I have text and I tokenize it then I collect the bigram and trigram and fourgram like that
Try everygrams
from nltk import everygrams
list(everygrams('hello', 1, 5))
('h', 'e'),
('e', 'l'),
('l', 'l'),
('l', 'o'),
('h', 'e', 'l'),
('e', 'l', 'l'),
('l', 'l', 'o'),
('h', 'e', 'l', 'l'),
('e', 'l', 'l', 'o'),
('h', 'e', 'l', 'l', 'o')]
Word tokens:
from nltk import everygrams
list(everygrams('hello word is a fun program'.split(), 1, 5))
('hello', 'word'),
('word', 'is'),
('is', 'a'),
('a', 'fun'),
('fun', 'program'),
('hello', 'word', 'is'),
('word', 'is', 'a'),
('is', 'a', 'fun'),
('a', 'fun', 'program'),
('hello', 'word', 'is', 'a'),
('word', 'is', 'a', 'fun'),
('is', 'a', 'fun', 'program'),
('hello', 'word', 'is', 'a', 'fun'),
('word', 'is', 'a', 'fun', 'program')]