| # goal is to return spacy's NER output to the following format: ((line_start, token_start), (line_end, token_end)) - ENT
|
|
|
| import spacy
|
| nlp = spacy.load('en_core_web_sm')
|
|
|
| doc = '\n"The world?" asked Gideon Spilett.\n\n"No, the island. Some stones for ballast, a mast, and a sail, which\nthe captain will make for us some day, and we shall go splendidly!\nWell, captain--and you, Mr. Spilett; and you, Herbert; and you,\n'
|
|
|
| labels = ['PERSON', 'FAC', 'VEH', 'ORG', 'GPE', 'LOC']
|
|
|
| ann_format = []
|
|
|
| for ent in doc.ents:
|
| if ent.label_ in labels:
|
| ann_format.append(f'((line_start, {ent.start}), (line_end, {ent.end})) - {ent.label_}')
|
| print(f'{ent.text}, {ent.label_}')
|