data = {\'A\': [\'word1 other stuff\', \'otherstuff word1\', \'hello word3 bye\'], \'B\': [\'foo word1\', \'word2 hello\', \'word2 bye\'] } df = pd.DataFrame (d