斯坦福词性标注Demo

蹲街弑〆低调 提交于 2019-11-29 18:55:23

第一种:针对单个单词进行词性标注

import java.io.IOException;
import edu.stanford.nlp.tagger.maxent.MaxentTagger; 

public class tagger 
{    
	public static void main(String[] args) throws IOException,ClassNotFoundException 
	{       
		// Initialize the tagger        
		MaxentTagger tagger = new MaxentTagger("F:/stanford-postagger-2014-06-16/models/english-left3words-distsim.tagger");         
		// The sample string        
		String sample = "text";         
		// The tagged string        
		String tagged = tagger.tagString(sample); 
		
		// Output the result        
		System.out.println(tagged);   
    }

}

第二种:对单句话进行词性标注

class TaggerDemo {

	private TaggerDemo() {}

	public static void main(String[] args) throws Exception 
    {
		if (args.length != 2) 
	    {
			System.err.println("usage: java TaggerDemo modelFile fileToTag");
		    
			return;
        }
        MaxentTagger tagger = new MaxentTagger("F:\\stanford-postagger-2014-06-16\\models\\english-bidirectional-distsim.tagger");
      
        List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("F:\trigger.txt")));
        
        for (List<HasWord> sentence : sentences) 
        {
        	List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    	  
    	    System.out.println(Sentence.listToString(tSentence, false));
        }
    }

}
第三种:读取文本文件,对文件进行词性标注


import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.List;

import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;

/** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
 *  being tagged by the tagger. The sentences are generated by direct use
 *  of the DocumentPreprocessor class.
 *
 *  @author Christopher Manning
 */
class TaggerDemo2 {

	private TaggerDemo2() {}

    public static void main(String[] args) throws Exception 
    {
	
    	if (args.length != 2) 
    	{
    		System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
            
    		return;
        }
    
    	MaxentTagger tagger = new MaxentTagger(args[0]);
    	
    	TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
									   "untokenizable=noneKeep");
    	
    	BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
    	
    	PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
    	
    	DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
    	
    	documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
    	
    	for (List<HasWord> sentence : documentPreprocessor)
    	{
    		List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    		
    		pw.println(Sentence.listToString(tSentence, false));
    	}

    	// print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
    	List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
    	
    	List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    	
    	for (TaggedWord tw : taggedSent) 
    	{
    		if (tw.tag().startsWith("JJ")) 
    		{
    			pw.println(tw.word());
    		}
    	}

    	pw.close();
    }

}


标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!