forked from Show-Me-the-Code/python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path0006.py
More file actions
36 lines (32 loc) · 1.19 KB
/
0006.py
File metadata and controls
36 lines (32 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- coding: utf-8 -*-
"""
**第 0006 题:**
你有一个目录,放了你一个月的日记,都是 txt,
为了避免分词的问题,假设内容都是英文,请统计出你认为每篇日记最重要的词。
"""
import os
import re
def findWord(DirPath):
if not os.path.isdir(DirPath):
return
fileList = os.listdir(DirPath)
reObj = re.compile('\b?(\w+)\b?')
for file in fileList:
filePath = os.path.join(DirPath, file)
if os.path.isfile(filePath) and os.path.splitext(filePath)[1] == '.txt':
with open(filePath) as f:
data = f.read()
words = reObj.findall(data)
wordDict = dict()
for word in words:
word = word.lower()
if word in ['a', 'the', 'to']:
continue
if word in wordDict:
wordDict[word] += 1
else:
wordDict[word] = 1
ansList = sorted(wordDict.items(), key=lambda t: t[1], reverse=True)
print('file: %s->the most word: %s' % (file, ansList[1]))
if __name__ == '__main__':
findWord('source/0006')