Skip to content

Commit bb2569e

Browse files
Add removeDocument to TFIDF (#749)
* Add removeDocument to TFIDF * Bug * Typo
1 parent ed002eb commit bb2569e

File tree

2 files changed

+33
-0
lines changed

2 files changed

+33
-0
lines changed

lib/natural/tfidf/tfidf.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,25 @@ class TfIdf {
132132
}
133133
}
134134

135+
// Remove a document from the corpus
136+
// Returns true if the document was found
137+
// Returns false if the document was not found
138+
removeDocument (key) {
139+
// Find the document
140+
const index = this.documents.findIndex(function (document) {
141+
return document.__key === key
142+
})
143+
// If found, remove it
144+
if (index > -1) {
145+
this.documents.splice(index, 1)
146+
// Invalidate the cache
147+
this._idfCache = Object.create(null)
148+
return true
149+
}
150+
151+
return false
152+
}
153+
135154
// If restoreCache is set to true, all terms idf scores currently cached will be recomputed.
136155
// Otherwise, the cache will just be wiped clean
137156
addFileSync (path, encoding, key, restoreCache) {

spec/tfidf_spec.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,4 +283,18 @@ describe('tfidf', function () {
283283
expect(tfidf.setStopwords(stopwords)).toEqual(false)
284284
})
285285
})
286+
287+
describe('Remove documents', function () {
288+
it('should remove a document', function () {
289+
tfidf = new TfIdf()
290+
291+
tfidf.addDocument('this document is about node.', 0)
292+
tfidf.addDocument('this document isn\'t about node.', 1)
293+
294+
const result1 = tfidf.removeDocument(0)
295+
expect(result1).toEqual(true)
296+
const result2 = tfidf.removeDocument(0)
297+
expect(result2).toEqual(false)
298+
})
299+
})
286300
})

0 commit comments

Comments
 (0)