第一种:针对单个单词进行词性标注
import java.io.IOException;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
public class tagger
{
public static void main(String[] args) throws IOException,ClassNotFoundException
{
// Initialize the tagger
MaxentTagger tagger = new MaxentTagger("F:/stanford-postagger-2014-06-16/models/english-left3words-distsim.tagger");
// The sample string
String sample = "text";
// The tagged string
String tagged = tagger.tagString(sample);
// Output the result
System.out.println(tagged);
}
}
第二种:对单句话进行词性标注
class TaggerDemo {
private TaggerDemo() {}
public static void main(String[] args) throws Exception
{
if (args.length != 2)
{
System.err.println("usage: java TaggerDemo modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger("F:\\stanford-postagger-2014-06-16\\models\\english-bidirectional-distsim.tagger");
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("F:\trigger.txt")));
for (List<HasWord> sentence : sentences)
{
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
System.out.println(Sentence.listToString(tSentence, false));
}
}
}
第三种:读取文本文件,对文件进行词性标注
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.List;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
/** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
* being tagged by the tagger. The sentences are generated by direct use
* of the DocumentPreprocessor class.
*
* @author Christopher Manning
*/
class TaggerDemo2 {
private TaggerDemo2() {}
public static void main(String[] args) throws Exception
{
if (args.length != 2)
{
System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0]);
TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
"untokenizable=noneKeep");
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : documentPreprocessor)
{
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
pw.println(Sentence.listToString(tSentence, false));
}
// print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
List<TaggedWord> taggedSent = tagger.tagSentence(sent);
for (TaggedWord tw : taggedSent)
{
if (tw.tag().startsWith("JJ"))
{
pw.println(tw.word());
}
}
pw.close();
}
}
来源:CSDN
作者:小妖精Fsky
链接:https://blog.csdn.net/u014221266/article/details/44646097