Skip to content

Commit abb4711

Browse files
committed
modified URLChecker
1 parent 3dfd5aa commit abb4711

File tree

4 files changed

+36
-14
lines changed

4 files changed

+36
-14
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
Utils/URLChecker/out
22
Utils/URLChecker/*.class
3+
Utils/.idea
4+
Utils/Utils.iml

Utils/URLChecker/BadUrlsSaver.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,22 @@ public class BadUrlsSaver {
1818
*/
1919
public static void save(Map<String, String> map, boolean isSaveAlLMatterUrls) throws IOException {
2020
File file = new File("..\\..\\badUrls.txt");
21-
if (!file.exists()) {
22-
file.createNewFile();
21+
if (file.exists()) {
22+
System.out.println("删除先前文件: " + (file.delete() ? "成功" : "失败"));
2323
}
2424
FileWriter writer = new FileWriter(file);
2525
BufferedWriter bufferedWriter = new BufferedWriter(writer);
26+
int index = 0;
2627
for (Map.Entry<String, String> entry : map.entrySet()) {
2728
if (isSaveAlLMatterUrls) {
28-
bufferedWriter.write(entry.getValue() + "\t\t" + entry.getKey() + "\r\n");
29+
bufferedWriter.write((index++) + ". " + "[" + entry.getValue() + "]+ \"(\"+ entry.getKey() + +\")\"+ \"\r\n");
2930
} else {
3031
if (entry.getValue().equals("TIMEOUT")) {
31-
bufferedWriter.write(entry.getValue() + "\t\t" + entry.getKey() + "\r\n");
32+
bufferedWriter.write((index++) + ". " + "[" + entry.getValue() + "]+ \"(\"+ entry.getKey() + +\")\"+ \"\r\n");
3233
}
3334
}
3435
}
3536
bufferedWriter.close();
3637
}
38+
3739
}

Utils/URLChecker/NetUtils.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import java.io.InputStreamReader;
55
import java.net.HttpURLConnection;
66
import java.net.MalformedURLException;
7+
import java.net.ProtocolException;
78
import java.net.URL;
89
import java.util.HashMap;
910
import java.util.List;
@@ -27,11 +28,16 @@ public static String getBundle(String urlMain) {
2728
try {
2829
URL url = new URL(urlMain);
2930
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
31+
connection.setRequestProperty("Content-Type", "text/html");
32+
connection.setRequestProperty("Accept-Charset", "utf-8");
33+
connection.setRequestProperty("Content-Type", "utf-8");
34+
connection.setRequestProperty("Charset", "utf-8");
35+
connection.setRequestMethod("GET");
3036
InputStream inputStream;
3137
BufferedReader bufferedReader = null;
3238
if (connection.getResponseCode() == HttpURLConnection.HTTP_OK) {
3339
inputStream = connection.getInputStream();
34-
bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
40+
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
3541
String line = null;
3642
while ((line = bufferedReader.readLine()) != null) {
3743
stringBuilder.append(line);
@@ -58,7 +64,7 @@ public static Map<String, String> getNotAvailableUrl(String urlMain) {
5864
threadPool.execute(new ItemThread(url, new ICallback() {
5965
@Override
6066
public void finish(int code) {
61-
matterUrls.put(url, "BAD");
67+
matterUrls.put(url, "BAD" + "[Code: " + code + "]");
6268
}
6369

6470
@Override
@@ -75,6 +81,7 @@ public void timeout() {
7581
}
7682
}
7783

84+
7885
}
7986

8087
class ItemThread implements Runnable {
@@ -89,25 +96,32 @@ public ItemThread(String url, ICallback callback) {
8996

9097
@Override
9198
public void run() {
99+
HttpURLConnection connection = null;
92100
try {
93101
URL tmp = new URL(url);
94-
HttpURLConnection connection = (HttpURLConnection) tmp.openConnection();
102+
connection = (HttpURLConnection) tmp.openConnection();
95103
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36");
96104
connection.setRequestMethod("GET");
97-
connection.setConnectTimeout(1000);
105+
connection.setReadTimeout(10000);
106+
connection.setConnectTimeout(10000);
98107
int responseCode = connection.getResponseCode();
99108
if (responseCode == HttpURLConnection.HTTP_OK) {
100109
System.out.println("checking [" + url + "]" + "----->[OK]");
101110
} else {
102111
System.out.println("checking [" + url + "]" + "----->[BAD]" + "; Code--->" + responseCode);
103112
callback.finish(responseCode);
104113
}
105-
connection.disconnect();
106114
} catch (MalformedURLException e) {
107115
e.printStackTrace();
116+
} catch (ProtocolException e) {
117+
e.printStackTrace();
108118
} catch (IOException e) {
109-
System.out.println("checking [" + url + "]" + "[Time Out]");
119+
System.out.println("checking [" + url + "]" + e.getMessage());
110120
callback.timeout();
121+
} finally {
122+
if (connection != null) {
123+
connection.disconnect();
124+
}
111125
}
112126
}
113127
}

Utils/URLChecker/UrlMatcher.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,19 @@
88
*/
99
public class UrlMatcher {
1010
public static List<String> getUrlStr(String response) {
11-
Pattern pattern = Pattern.compile("(http|ftp|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,@?^=%&amp;:/~\\+#]*[\\w\\-\\@?^=%&amp;/~\\+#])?");
11+
Pattern pattern = Pattern.compile("[a-zA-z]+://[^\\s]*", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
1212
Matcher matcher = pattern.matcher(response);
13-
List<String> urlList = new ArrayList<String>();
13+
List<String> urls = new ArrayList<String>();
1414
while (matcher.find()) {
15-
urlList.add(matcher.group());
15+
String tmp = matcher.group();
16+
int index = tmp.lastIndexOf(")");
17+
tmp = tmp.substring(0, index);
18+
urls.add(tmp);
1619
}
17-
return urlList;
20+
return urls;
1821
}
1922

23+
2024
public static String getUrlTitle(String urlResponse) {
2125
Pattern pattern = Pattern.compile("<title>.*?</title>");
2226
Matcher matcher = pattern.matcher(urlResponse);

0 commit comments

Comments
 (0)