I am trying to extract noun phrases from sentences using Stanza(with Stanford CoreNLP). This can only be done with the CoreNLPClient module in Stanza.
# Imp
from stanza.server import CoreNLPClient
# get noun phrases with tregex
def noun_phrases(_client, _text, _annotators=None):
pattern = 'NP'
matches = _client.tregex(_text,pattern,annotators=_annotators)
print("\n".join(["\t"+sentence[match_id]['spanString'] for sentence in matches['sentences'] for match_id in sentence]))
# English example
with CoreNLPClient(timeout=30000, memory='16G') as client:
englishText = "Albert Einstein was a German-born theoretical physicist. He developed the theory of relativity."
print('---')
print(englishText)
noun_phrases(client,englishText,_annotators="tokenize,ssplit,pos,lemma,parse")
# French example
with CoreNLPClient(properties='french', timeout=30000, memory='16G') as client:
frenchText = "Je suis John."
print('---')
print(frenchText)
noun_phrases(client,frenchText,_annotators="tokenize,ssplit,mwt,pos,lemma,parse")