Multi-term named entities in Stanford Named Entity Recognizer

前端未结

关注

 8  1351

春和景丽 2021-01-31 19:33

I\'m using the Stanford Named Entity Recognizer http://nlp.stanford.edu/software/CRF-NER.shtml and it\'s working fine. This is

    List&


      
      
        
          8条回答        

        
                    
            
            
                         
                
              
              
                
                   天涯浪人
                                             
                
                
                (楼主)
            
              
              
                2021-01-31 20:04
              

            
            
                        
Here is my full code, I use Stanford core NLP and write algorithm to concatenate Multi Term names. 

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import org.apache.log4j.Logger;

import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

/**
 * Created by Chanuka on 8/28/14 AD.
 */
public class FindNameEntityTypeExecutor {

private static Logger logger = Logger.getLogger(FindNameEntityTypeExecutor.class);

private StanfordCoreNLP pipeline;

public FindNameEntityTypeExecutor() {
    logger.info("Initializing Annotator pipeline ...");

    Properties props = new Properties();

    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");

    pipeline = new StanfordCoreNLP(props);

    logger.info("Annotator pipeline initialized");
}

List findNameEntityType(String text, String entity) {
    logger.info("Finding entity type matches in the " + text + " for entity type, " + entity);

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);
    List sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    List matches = new ArrayList();

    for (CoreMap sentence : sentences) {

        int previousCount = 0;
        int count = 0;
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods

        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            String word = token.get(CoreAnnotations.TextAnnotation.class);

            int previousWordIndex;
            if (entity.equals(token.get(CoreAnnotations.NamedEntityTagAnnotation.class))) {
                count++;
                if (previousCount != 0 && (previousCount + 1) == count) {
                    previousWordIndex = matches.size() - 1;
                    String previousWord = matches.get(previousWordIndex);
                    matches.remove(previousWordIndex);
                    previousWord = previousWord.concat(" " + word);
                    matches.add(previousWordIndex, previousWord);

                } else {
                    matches.add(word);
                }
                previousCount = count;
            }
            else
            {
                count=0;
                previousCount=0;
            }


        }

    }
    return matches;
}
}

    
             
                                                        
            
            
              
                
                0
              
                   
                
               讨论(0)
              
                                                  
              
              
                          
             
       
          
              
                                       
     查看其它8个回答


            
                         
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
                              			
        
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复