diff --git a/Chapter-6/document_similarity.py b/Chapter-6/document_similarity.py index 2e7510e..a20fc73 100644 --- a/Chapter-6/document_similarity.py +++ b/Chapter-6/document_similarity.py @@ -39,11 +39,11 @@ def compute_cosine_similarity(doc_features, corpus_features, top_n=3): # get document vectors - doc_features = doc_features.toarray()[0] - corpus_features = corpus_features.toarray() + doc_features = doc_features[0] # compute similarities similarity = np.dot(doc_features, corpus_features.T) + similarity = similarity.toarray()[0] # get docs with highest similarity scores top_docs = similarity.argsort()[::-1][:top_n] top_docs_with_score = [(index, round(similarity[index], 3))