Do the dot-product on the sparse matrix

When you have a large corpus, first making the matrix dense takes a lot of memory. Doing the dot product first and then expanding the result is more memory-efficient and still gives the same result
dipanjanS · dipanjanS · May 21, 2017 · May 19, 2017 · May 19, 2017 · e0b091e32335eb24332fe6ad89d7ac4ae945308d
commit e0b091e32335eb24332fe6ad89d7ac4ae945308d
diff --git a/Chapter-6/document_similarity.py b/Chapter-6/document_similarity.py
@@ -39,11 +39,11 @@
 def compute_cosine_similarity(doc_features, corpus_features,
                               top_n=3):
     # get document vectors
-    doc_features = doc_features.toarray()[0]
-    corpus_features = corpus_features.toarray()
+    doc_features = doc_features[0]
     # compute similarities
     similarity = np.dot(doc_features, 
                         corpus_features.T)
+    similarity = similarity.toarray()[0]
     # get docs with highest similarity scores
     top_docs = similarity.argsort()[::-1][:top_n]
     top_docs_with_score = [(index, round(similarity[index], 3))