Skip to content

Commit ce1c597

Browse files
committed
feat: verify URL content,use baidu for alive test
1 parent be7060a commit ce1c597

File tree

3 files changed

+51
-21
lines changed

3 files changed

+51
-21
lines changed

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010
## 帮助
1111

1212
```shell
13-
> .\rotateproxy.exe -h
13+
.\rotateproxy.exe -h
1414
Usage of rotateproxy.exe:
15+
-check string
16+
check url (default "https://www.google.com")
17+
-checkWords string
18+
words in check url (default "Copyright The Closure Library Authors")
1519
-email string
1620
email address
1721
-l string
@@ -20,10 +24,14 @@ Usage of rotateproxy.exe:
2024
the page count you want to crawl (default 5)
2125
-pass string
2226
authentication password
27+
-proxy string
28+
proxy
2329
-region int
2430
0: all 1: cannot bypass gfw 2: bypass gfw
2531
-rule string
26-
search rule (default "protocol==\"socks5\" && \"Version:5 Method:No Authentication(0x00)\" && after=\"2021-08-01\" && country=\"CN\"")
32+
search rule (default "protocol==\"socks5\" && \"Version:5 Method:No Authentication(0x00)\" && after=\"2022-02-01\" && country=\"CN\"")
33+
-strategy int
34+
0: random, 1: Select the one with the shortest timeout, 2: Select the two with the shortest timeout, ... (default 3)
2735
-token string
2836
token
2937
-user string

check.go

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package rotateproxy
33
import (
44
"crypto/tls"
55
"fmt"
6-
"io/ioutil"
6+
"io"
77
"net/http"
88
"net/url"
99
"strings"
@@ -38,23 +38,26 @@ func CheckProxyAlive(proxyURL string) (respBody string, timeout int64, avail boo
3838
Timeout: 20 * time.Second,
3939
}
4040
startTime := time.Now()
41-
resp, err := httpclient.Get("http://cip.cc/")
41+
42+
// http://cip.cc isn't stable enough for proxies alive test.
43+
resp, err := httpclient.Get("https://www.baidu.com/robots.txt")
44+
4245
if err != nil {
4346
return "", 0, false
4447
}
4548
defer resp.Body.Close()
4649
timeout = int64(time.Since(startTime))
47-
body, err := ioutil.ReadAll(resp.Body)
50+
body, err := io.ReadAll(resp.Body)
4851
if err != nil {
4952
return "", 0, false
5053
}
51-
if !strings.Contains(string(body), "地址") {
54+
if !strings.Contains(string(body), "Baiduspider-image") {
5255
return "", 0, false
5356
}
5457
return string(body), timeout, true
5558
}
5659

57-
func CheckProxyWithCheckURL(proxyURL string, checkURL string) (timeout int64, avail bool) {
60+
func CheckProxyWithCheckURL(proxyURL string, checkURL string, checkURLwords string) (timeout int64, avail bool) {
5861
fmt.Printf("check %s: %s\n", proxyURL, checkURL)
5962
proxy, _ := url.Parse(proxyURL)
6063
httpclient := &http.Client{
@@ -72,32 +75,41 @@ func CheckProxyWithCheckURL(proxyURL string, checkURL string) (timeout int64, av
7275
}
7376
defer resp.Body.Close()
7477
timeout = int64(time.Since(startTime))
78+
body, err := io.ReadAll(resp.Body)
79+
80+
if err != nil {
81+
return 0, false
82+
}
7583

7684
// TODO: support regex
7785
if resp.StatusCode != 200 {
7886
return 0, false
7987
}
8088

89+
if !strings.Contains(string(body), checkURLwords) {
90+
return 0, false
91+
}
92+
8193
return timeout, true
8294
}
8395

84-
func StartCheckProxyAlive(checkURL string) {
96+
func StartCheckProxyAlive(checkURL string, checkURLwords string) {
8597
go func() {
8698
ticker := time.NewTicker(120 * time.Second)
8799
for {
88100
select {
89101
case <-crawlDone:
90102
fmt.Println("Checking")
91-
checkAlive(checkURL)
103+
checkAlive(checkURL, checkURLwords)
92104
fmt.Println("Check done")
93105
case <-ticker.C:
94-
checkAlive(checkURL)
106+
checkAlive(checkURL, checkURLwords)
95107
}
96108
}
97109
}()
98110
}
99111

100-
func checkAlive(checkURL string) {
112+
func checkAlive(checkURL string, checkURLwords string) {
101113
proxies, err := QueryProxyURL()
102114
if err != nil {
103115
fmt.Printf("[!] query db error: %v\n", err)
@@ -108,7 +120,7 @@ func checkAlive(checkURL string) {
108120
respBody, timeout, avail := CheckProxyAlive(proxy.URL)
109121
if avail {
110122
if checkURL != "" {
111-
timeout, avail = CheckProxyWithCheckURL(proxy.URL, checkURL)
123+
timeout, avail = CheckProxyWithCheckURL(proxy.URL, checkURL, checkURLwords)
112124
}
113125
if avail {
114126
fmt.Printf("%v 可用\n", proxy.URL)

cmd/rotateproxy/main.go

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,24 @@ package main
22

33
import (
44
"flag"
5+
"fmt"
6+
"os"
57
"regexp"
68
"strings"
79

810
"github.com/akkuman/rotateproxy"
911
)
1012

1113
var (
12-
baseCfg rotateproxy.BaseConfig
13-
email string
14-
token string
15-
rule string
16-
pageCount int
17-
proxy string
18-
checkURL string
19-
portPattern = regexp.MustCompile(`^\d+$`)
14+
baseCfg rotateproxy.BaseConfig
15+
email string
16+
token string
17+
rule string
18+
pageCount int
19+
proxy string
20+
checkURL string
21+
checkURLwords string
22+
portPattern = regexp.MustCompile(`^\d+$`)
2023
)
2124

2225
func init() {
@@ -28,10 +31,17 @@ func init() {
2831
flag.StringVar(&proxy, "proxy", "", "proxy")
2932
flag.StringVar(&rule, "rule", `protocol=="socks5" && "Version:5 Method:No Authentication(0x00)" && after="2022-02-01" && country="CN"`, "search rule")
3033
flag.StringVar(&checkURL, "check", `https://www.google.com`, "check url")
34+
flag.StringVar(&checkURLwords, "checkWords", `Copyright The Closure Library Authors`, "words in check url")
3135
flag.IntVar(&baseCfg.IPRegionFlag, "region", 0, "0: all 1: cannot bypass gfw 2: bypass gfw")
3236
flag.IntVar(&baseCfg.SelectStrategy, "strategy", 3, "0: random, 1: Select the one with the shortest timeout, 2: Select the two with the shortest timeout, ...")
3337
flag.IntVar(&pageCount, "page", 5, "the page count you want to crawl")
3438
flag.Parse()
39+
40+
if checkURL != "https://www.google.com" && checkURLwords == "Copyright The Closure Library Authors" {
41+
fmt.Println("You set check url but forget to set `-checkWords`!")
42+
os.Exit(1)
43+
}
44+
3545
}
3646

3747
func isFlagPassed(name string) bool {
@@ -57,7 +67,7 @@ func main() {
5767
}
5868

5969
rotateproxy.StartRunCrawler(token, email, rule, pageCount, proxy)
60-
rotateproxy.StartCheckProxyAlive(checkURL)
70+
rotateproxy.StartCheckProxyAlive(checkURL, checkURLwords)
6171
c := rotateproxy.NewRedirectClient(rotateproxy.WithConfig(&baseCfg))
6272
c.Serve()
6373
select {}

0 commit comments

Comments
 (0)