Skip to content

Commit 20065c3

Browse files
committed
feat(corefs): adding corefs annotation support
1 parent 675dc70 commit 20065c3

18 files changed

+1881
-9
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ CoreNLP
198198
ParserAnnotator # https://stanfordnlp.github.io/CoreNLP/parse.html
199199
DependencyParseAnnotator # https://stanfordnlp.github.io/CoreNLP/depparse.html
200200
RelationExtractorAnnotator # https://stanfordnlp.github.io/CoreNLP/relation.html
201-
DeterministicCorefAnnotator # https://stanfordnlp.github.io/CoreNLP/coref.html
201+
CorefAnnotator # https://stanfordnlp.github.io/CoreNLP/coref.html
202202
SentimentAnnotator # https://stanfordnlp.github.io/CoreNLP/sentiment.html - TODO
203203
RelationExtractorAnnotator # https://stanfordnlp.github.io/CoreNLP/relation.html - TODO
204204
NaturalLogicAnnotator # https://stanfordnlp.github.io/CoreNLP/natlog.html - TODO

examples/corefs.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// NOTE: run with babel-node
2+
import path from 'path';
3+
import CoreNLP, { Properties, Pipeline, ConnectorServer } from '../src';
4+
5+
const props = new Properties();
6+
props.setProperty('annotators', 'tokenize,ssplit,coref');
7+
const doc = new CoreNLP.simple.Document(`
8+
George is a good person. He is really smart. I heard he is also a great son.
9+
Lisa is instead a sad girl, I never saw her happy.
10+
`);
11+
const pipeline = new Pipeline(props, 'English');
12+
13+
pipeline.annotate(doc)
14+
.then(doc => {
15+
const corefChains = doc.corefs();
16+
corefChains.forEach(chain => {
17+
console.log('representative ->', chain.representative().token().word());
18+
chain.nonRepresentatives().forEach(mention => {
19+
console.log(' ref ->', mention.token().word(), mention.gender(), mention.number());
20+
});
21+
});
22+
})
23+
.catch(err => {
24+
console.log('err', err);
25+
});
26+
27+
/*
28+
OUTPUT:
29+
representative -> George
30+
ref -> He MALE SINGULAR
31+
ref -> he MALE SINGULAR
32+
representative -> Lisa
33+
ref -> her FEMALE SINGULAR
34+
representative -> I
35+
ref -> I UNKNOWN SINGULAR
36+
*/

src/index.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import ParserAnnotator from './simple/annotator/parse';
1313
import DependencyParseAnnotator from './simple/annotator/depparse';
1414
import RelationExtractorAnnotator from './simple/annotator/relation';
1515
import RegexNERAnnotator from './simple/annotator/regexner';
16+
import CorefAnnotator from './simple/annotator/coref';
1617
import Tree from './util/tree';
1718
import _Properties from './properties';
1819
import _Pipeline from './pipeline';
@@ -56,6 +57,7 @@ export default {
5657
DependencyParseAnnotator,
5758
RelationExtractorAnnotator,
5859
RegexNERAnnotator,
60+
CorefAnnotator,
5961
},
6062
},
6163
/**

src/index.spec.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import ParserAnnotator from './simple/annotator/parse';
1818
import DependencyParseAnnotator from './simple/annotator/depparse';
1919
import RelationExtractorAnnotator from './simple/annotator/relation';
2020
import RegexNERAnnotator from './simple/annotator/regexner';
21+
import CorefAnnotator from './simple/annotator/coref';
2122
import Tree from './util/tree';
2223

2324
describe('CoreNLP Library entry point', () => {
@@ -71,6 +72,7 @@ describe('CoreNLP Library entry point', () => {
7172
DependencyParseAnnotator,
7273
RelationExtractorAnnotator,
7374
RegexNERAnnotator,
75+
CorefAnnotator,
7476
});
7577
});
7678
});

src/pipeline.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import parse from './simple/annotator/parse';
1010
import depparse from './simple/annotator/depparse';
1111
import relation from './simple/annotator/relation';
1212
import regexner from './simple/annotator/regexner';
13+
import coref from './simple/annotator/coref';
1314
import Document from './simple/document';
1415

1516
import {
@@ -28,6 +29,7 @@ const ANNOTATORS_BY_KEY = {
2829
depparse,
2930
relation,
3031
regexner,
32+
coref,
3133
};
3234

3335
const LANGUAGE_TO_ISO2 = {

src/simple/annotator/coref.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import Annotator from '../annotator';
2+
import TokenizerAnnotator from './tokenize';
3+
import WordsToSentenceAnnotator from './ssplit';
4+
5+
/**
6+
* @class
7+
* @classdesc Class representing an CorefAnnotator.
8+
* @extends Annotator
9+
* @memberof CoreNLP/simple/annotator
10+
* @requires tokenize, ssplit, coref
11+
* @see {@link https://stanfordnlp.github.io/CoreNLP/coref.html|CorefAnnotator}
12+
*/
13+
class CorefAnnotator extends Annotator {
14+
/**
15+
* Create an Annotator
16+
* @param {Object} [options] a key-value map of options, without the annotator prefix
17+
*/
18+
constructor(options = {}) {
19+
super(
20+
'coref',
21+
{
22+
...options,
23+
},
24+
[
25+
new TokenizerAnnotator(),
26+
new WordsToSentenceAnnotator(),
27+
],
28+
);
29+
}
30+
}
31+
32+
export default CorefAnnotator;

src/simple/annotator/coref.spec.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import CorefAnnotator from './coref';
2+
3+
describe('Annotator', () => {
4+
let annotator;
5+
6+
describe('CorefAnnotator', () => {
7+
beforeEach(() => {
8+
annotator = new CorefAnnotator();
9+
});
10+
11+
it('should have a proper pipeline', () => {
12+
expect(annotator.pipeline()).to.deep.equal(['tokenize', 'ssplit', 'coref']);
13+
});
14+
15+
it('should have the proper default options', () => {
16+
expect(annotator.options()).to.deep.equal({
17+
});
18+
});
19+
});
20+
});

src/simple/coref-chain.js

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import CorefMention from './coref-mention';
2+
3+
/**
4+
* @class
5+
* @classdesc Class representing an CorefChain
6+
*/
7+
class CorefChain {
8+
/**
9+
* Create an CorefChain
10+
* @param {Array.<CorefMention>} mentions
11+
*/
12+
constructor(mentions) {
13+
this._mentions = mentions;
14+
}
15+
16+
/**
17+
* Retrieves all the contained CorefMention instances
18+
* @returns {Array.<CorefMention>} mentions
19+
*/
20+
mentions() {
21+
return this._mentions;
22+
}
23+
24+
/**
25+
* Retrieves a CorefMention at the index specified
26+
* @param {number} index
27+
* @returns {CorefMention} mention
28+
*/
29+
mention(index) {
30+
return this._mentions[index];
31+
}
32+
33+
/**
34+
* Retrieves the first representative mention
35+
* @returns {CorefMention} mention
36+
*/
37+
representative() {
38+
return this._mentions.find(mention => mention.isRepresentativeMention());
39+
}
40+
41+
/**
42+
* Retrieves all the non-representative mentions
43+
* @returns {Array.<CorefMention>} mentions
44+
*/
45+
nonRepresentatives() {
46+
return this._mentions.filter(mention => !mention.isRepresentativeMention());
47+
}
48+
49+
/**
50+
* Gets or sets a Document reference for the current coref-chain
51+
* @param {Document} doc
52+
* @returns {Document} doc
53+
*/
54+
document(doc = null) {
55+
if (doc) {
56+
this._document = doc;
57+
}
58+
59+
return this._document;
60+
}
61+
62+
/**
63+
* Update an instance of CorefChain with Document references to Sentence(s) and their Token(s)
64+
* @param {Document} doc - a Document object, the same one used to generate corefs annotations
65+
* @returns {CorefChain} chain - The current chain instance
66+
*/
67+
fromDocument(doc) {
68+
this._mentions.forEach((mention) => {
69+
const sentence = doc.sentence(mention.sentNum() - 1);
70+
const token = sentence.token(mention.startIndex() - 1);
71+
mention.sentence(sentence);
72+
mention.token(token);
73+
});
74+
return this;
75+
}
76+
77+
/**
78+
* Update an instance of CorefChain with data provided by a JSON
79+
* @param {Array.<CorefMentionJSON>} data - A sentence corefs mentions chain, as
80+
* returned by CoreNLP API service
81+
* @returns {CorefChain} chain - The current chain instance
82+
*/
83+
fromJSON(data) {
84+
this._mentions = data.map(mention => CorefMention.fromJSON(mention));
85+
return this;
86+
}
87+
88+
toJSON() {
89+
return [...this._mentions];
90+
}
91+
92+
/**
93+
* Get an instance of CorefChain from a given JSON of sentence corefs
94+
* @param {Array.<CorefMentionJSON>} data - The sentence corefs data, as
95+
* returned by CoreNLP API service
96+
* @returns {CorefChain} sentenchain - A new CorefChain instance
97+
*/
98+
static fromJSON(data) {
99+
const instance = new this();
100+
return instance.fromJSON(data);
101+
}
102+
}
103+
104+
export default CorefChain;

src/simple/coref-chain.spec.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import CorefChain from './coref-chain';
2+
3+
describe('CorefChain', () => {
4+
let chain;
5+
6+
beforeEach(() => {
7+
chain = new CorefChain();
8+
});
9+
10+
context('CorefChain interface', () => {
11+
it('should follow the CorefChain contract', () => {
12+
expect(chain).to.have.property('mentions').that.is.a('function');
13+
expect(chain).to.have.property('mention').that.is.a('function');
14+
expect(chain).to.have.property('representative').that.is.a('function');
15+
expect(chain).to.have.property('nonRepresentatives').that.is.a('function');
16+
expect(chain).to.have.property('document').that.is.a('function');
17+
expect(chain).to.have.property('fromDocument').that.is.a('function');
18+
expect(chain).to.have.property('fromJSON').that.is.a('function');
19+
});
20+
21+
describe.skip('constructor', () => {
22+
});
23+
});
24+
});

0 commit comments

Comments
 (0)