I\'m using the Stanford Named Entity Recognizer http://nlp.stanford.edu/software/CRF-NER.shtml and it\'s working fine. This is
List&
Here is my full code, I use Stanford core NLP and write algorithm to concatenate Multi Term names.
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
/**
* Created by Chanuka on 8/28/14 AD.
*/
public class FindNameEntityTypeExecutor {
private static Logger logger = Logger.getLogger(FindNameEntityTypeExecutor.class);
private StanfordCoreNLP pipeline;
public FindNameEntityTypeExecutor() {
logger.info("Initializing Annotator pipeline ...");
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
pipeline = new StanfordCoreNLP(props);
logger.info("Annotator pipeline initialized");
}
List findNameEntityType(String text, String entity) {
logger.info("Finding entity type matches in the " + text + " for entity type, " + entity);
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
pipeline.annotate(document);
List sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
List matches = new ArrayList();
for (CoreMap sentence : sentences) {
int previousCount = 0;
int count = 0;
// traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
int previousWordIndex;
if (entity.equals(token.get(CoreAnnotations.NamedEntityTagAnnotation.class))) {
count++;
if (previousCount != 0 && (previousCount + 1) == count) {
previousWordIndex = matches.size() - 1;
String previousWord = matches.get(previousWordIndex);
matches.remove(previousWordIndex);
previousWord = previousWord.concat(" " + word);
matches.add(previousWordIndex, previousWord);
} else {
matches.add(word);
}
previousCount = count;
}
else
{
count=0;
previousCount=0;
}
}
}
return matches;
}
}