Skip to content

Commit cfa2dc2

Browse files
committed
ndcg
1 parent a3cab7b commit cfa2dc2

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

docsim/ndcg.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# reference: https://en.wikipedia.org/wiki/Discounted_cumulative_gain
2+
import sys
3+
import math
4+
5+
6+
def nDCG( scores ):
7+
'''
8+
calculate nDCG@k
9+
'''
10+
DCGk = 0
11+
for i, score in enumerate(scores):
12+
DCGk += (( 2**score - 1 ) / math.log( i+2, 2 ))
13+
IDCGk = sum( [ (2**x-1)/math.log(i+2, 2) for i, x in enumerate(sorted(scores, reverse=True)) ] )
14+
15+
return DCGk/IDCGk
16+
17+
18+
19+
20+
if __name__=="__main__":
21+
22+
finput = sys.argv[1]
23+
k = int(sys.argv[2])
24+
'''
25+
finput is in the format of:
26+
query \t sku \t score \t pos
27+
here k is the value of p in wiki page
28+
'''
29+
30+
fin = open(finput, 'r')
31+
out = []
32+
last_query, scores = '', []
33+
for line in fin:
34+
try:
35+
if len(line.strip().split('\t'))==5:
36+
query, sku, score, pos, cos = line.strip().split('\t')
37+
else:
38+
query, sku, score, pos = line.strip().split('\t')
39+
except:
40+
print line.strip()
41+
if query == last_query:
42+
scores.append(int(score))
43+
else:
44+
scores = scores[:k]
45+
#if len(set(scores)) <= 1:
46+
if not any(scores):
47+
last_query, scores = query, [ int(score) ]
48+
continue
49+
out.append( (last_query, nDCG(scores) ) )
50+
last_query, scores = query, [ int(score) ]
51+
if query == last_query:
52+
scores = scores[:k]
53+
out.append( (last_query, nDCG(scores)) )
54+
55+
56+
for tuple in out:
57+
print "nDCG of {} is {}".format( tuple[0], tuple[1] )
58+
if out:
59+
print "Average is {}".format( sum([ x[1] for x in out])/len(out) )

0 commit comments

Comments
 (0)