Multi-term named entities in Stanford Named Entity Recognizer

前端 未结 8 1351
春和景丽
春和景丽 2021-01-31 19:33

I\'m using the Stanford Named Entity Recognizer http://nlp.stanford.edu/software/CRF-NER.shtml and it\'s working fine. This is

    List&         


        
8条回答
  •  天涯浪人
    2021-01-31 20:04

    Here is my full code, I use Stanford core NLP and write algorithm to concatenate Multi Term names.

    import edu.stanford.nlp.ling.CoreAnnotations;
    import edu.stanford.nlp.ling.CoreLabel;
    import edu.stanford.nlp.pipeline.Annotation;
    import edu.stanford.nlp.pipeline.StanfordCoreNLP;
    import edu.stanford.nlp.util.CoreMap;
    import org.apache.log4j.Logger;
    
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Properties;
    
    /**
     * Created by Chanuka on 8/28/14 AD.
     */
    public class FindNameEntityTypeExecutor {
    
    private static Logger logger = Logger.getLogger(FindNameEntityTypeExecutor.class);
    
    private StanfordCoreNLP pipeline;
    
    public FindNameEntityTypeExecutor() {
        logger.info("Initializing Annotator pipeline ...");
    
        Properties props = new Properties();
    
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
    
        pipeline = new StanfordCoreNLP(props);
    
        logger.info("Annotator pipeline initialized");
    }
    
    List findNameEntityType(String text, String entity) {
        logger.info("Finding entity type matches in the " + text + " for entity type, " + entity);
    
        // create an empty Annotation just with the given text
        Annotation document = new Annotation(text);
    
        // run all Annotators on this text
        pipeline.annotate(document);
        List sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
        List matches = new ArrayList();
    
        for (CoreMap sentence : sentences) {
    
            int previousCount = 0;
            int count = 0;
            // traversing the words in the current sentence
            // a CoreLabel is a CoreMap with additional token-specific methods
    
            for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                String word = token.get(CoreAnnotations.TextAnnotation.class);
    
                int previousWordIndex;
                if (entity.equals(token.get(CoreAnnotations.NamedEntityTagAnnotation.class))) {
                    count++;
                    if (previousCount != 0 && (previousCount + 1) == count) {
                        previousWordIndex = matches.size() - 1;
                        String previousWord = matches.get(previousWordIndex);
                        matches.remove(previousWordIndex);
                        previousWord = previousWord.concat(" " + word);
                        matches.add(previousWordIndex, previousWord);
    
                    } else {
                        matches.add(word);
                    }
                    previousCount = count;
                }
                else
                {
                    count=0;
                    previousCount=0;
                }
    
    
            }
    
        }
        return matches;
    }
    }
    

提交回复
热议问题