We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ee15ab8 commit 90114e2Copy full SHA for 90114e2
exercise0009/html_parse.py
@@ -0,0 +1,19 @@
1
+# coding: utf-8
2
+# author: jessun
3
+# date: 2017/6/15 16:49
4
+# **第 0009 题:**一个HTML文件,找出里面的**链接**。
5
+import re
6
+
7
8
+def html_parse(input_file):
9
+ with open(input_file, 'rt', encoding="utf-8") as f:
10
+ html_content = f.read()
11
+ all_links = re.findall('<a href="(.*?)">', html_content, re.S)
12
+ all_links = [x for x in all_links if not x == '']
13
+ all_links = [x for x in all_links if x.startswith("http")]
14
+ for x in all_links:
15
+ print(x)
16
17
18
+if __name__ == '__main__':
19
+ html_parse('index.html')
0 commit comments