Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 28 additions & 15 deletions lib/faexport/scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,11 @@ def user(name)

def budlist(name, page, is_watchers)
mode = is_watchers ? "to" : "by"
url = "watchlist/#{mode}/#{escape(name)}/#{page}/"
if page == 1
url = "watchlist/#{mode}/#{escape(name)}/"
else
url = "watchlist/#{mode}/#{escape(name)}/#{page}/"
end
html = fetch(url)

html.at_css("td.alt1").css(".c-usernameBlockSimple__displayName").map(&:content)
Expand Down Expand Up @@ -717,14 +721,12 @@ def search(options = {})
# Handle page specification
page = options["page"]
if page !~ /[0-9]+/ || page.to_i <= 1
options["page"] = 1
params["do_search"] = "Search"
params["page"] = 1
else
options["page"] = options["page"].to_i - 1
params["next_page"] = ">>> #{options["perpage"]} more >>>"
params["page"] = page.to_i
end

# Construct params, to send in POST request
# Construct params, to include as GET params
options.each do |key, value|
name = key.gsub("_", "-")
# If this is the range, remap old values to new ones
Expand Down Expand Up @@ -753,15 +755,11 @@ def search(options = {})
end
end

# Construct the search URL with GET params
search_get_params = URI.encode_www_form(params)
search_url = "/search/?#{search_get_params}"
# Get search response
raw = @cache.add("url:search:#{params}") do
response = post("/search/", params)
raise FAStatusError.new(fa_url("search/"), response.message) unless response.is_a?(Net::HTTPSuccess)

response.body
end
# Parse search results
html = Nokogiri::HTML(raw)
html = fetch(search_url)
# Get search results. Even a search with no matches gives this div.
results = html.at_css("#search-results")
# If form fails to submit, this div will not be there.
Expand Down Expand Up @@ -1318,7 +1316,7 @@ def fetch(path, extra_cookie = nil, as_guest: false)
rescue OpenURI::HTTPError => e
$http_errors.increment(labels: { page_type: page_type })
# Detect and handle known errors
if e.io.status[0] == "403" || e.io.status[0] == "503"
if e.io.status[0] == "403" || e.io.status[0] == "503" || e.io.status[0] == "400"
raw = e.io.read
html = Nokogiri::HTML(raw.encode("UTF-8", invalid: :replace, undef: :replace).delete("\000"))

Expand All @@ -1334,6 +1332,18 @@ def fetch(path, extra_cookie = nil, as_guest: false)
$slowdown_errors.increment(labels: { page_type: page_type })
raise FASlowdownError.new(url)
end

# Handle user not found errors
if e.io.status[0] == "400"
head = html.xpath("//head//title").first
if head.content == "System Error"
error_msg = html.at_css("table.maintable td.alt1 font").content
# Handle user profile not found, and user not found on journal listing
if error_msg.include?("This user cannot be found") || error_msg.include?("User not found!")
raise FANoUserError.new(url)
end
end
end
end
# Retry some types of error
if e.io.status[0] == "502" || e.io.status[0] == "520"
Expand Down Expand Up @@ -1428,6 +1438,9 @@ def check_errors(html, url)
if maintable_content.include?("has voluntarily disabled access to their account and all of its contents.")
raise FAAccountDisabledError.new(url)
end
if maintable_content.include?("Access has been disabled to the account and contents of user")
raise FAAccountDisabledError.new(url)
end

# Handle user not existing (this version of the error is raised by watchers lists and galleries)
if maintable_content.include?("Provided username not found in the database.") ||
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/check_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
RSpec::Matchers.define :be_valid_date_and_match_iso do |iso_string|
match do |date_string|
expect(date_string).not_to be_blank
expect(date_string).to match(/[A-Z][a-z]{2} [0-9]+([a-z]{2})?, [0-9]{4},? [0-9]{2}:[0-9]{2}( ?[AP]M)?/)
expect(date_string).to match(/[A-Z][a-z]{2,} [0-9]+([a-z]{2})?, [0-9]{4},? [0-9]{2}:[0-9]{2}(:[0-9]{2})?( ?[AP]M)?/)
expect(iso_string).not_to be_blank
expect(iso_string).to eql(Time.parse("#{date_string} UTC").iso8601)
end
Expand Down
11 changes: 6 additions & 5 deletions tests/integration/fa_parsing_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@
expect(sub[:category]).not_to be_blank
expect(sub[:theme]).not_to be_blank
expect(sub[:species]).not_to be_blank
expect(sub[:gender]).not_to be_blank
expect(sub[:gender]).to be_blank
expect(sub[:favorites]).to match(/[0-9]+/)
expect(sub[:favorites].to_i).to be_positive
expect(sub[:comments]).to match(/[0-9]+/)
Expand All @@ -486,7 +486,7 @@
expect(sub[:resolution]).not_to be_blank
expect(sub[:rating]).not_to be_blank
expect(sub[:keywords]).to be_instance_of Array
expect(sub[:keywords]).to eql(%w[keyword1 keyword2 keyword3])
expect(sub[:keywords]).to eql(%w[keyword1 keyword2 keyword3 male])
end

it "fails when given non-existent submissions" do
Expand All @@ -497,7 +497,7 @@
sub_id = "16437648"
sub = @fa.submission(sub_id)
expect(sub[:keywords]).to be_instance_of Array
expect(sub[:keywords]).to eql(%w[keyword1 keyword2 keyword3])
expect(sub[:keywords]).to eql(%w[keyword1 keyword2 keyword3 male])
end

it "has identical description and description_body" do
Expand Down Expand Up @@ -657,7 +657,7 @@
expect(sub[:category]).not_to be_blank
expect(sub[:theme]).not_to be_blank
expect(sub[:species]).not_to be_blank
expect(sub[:gender]).not_to be_blank
expect(sub[:gender]).to be_blank
expect(sub[:favorites]).to match(/[0-9]+/)
expect(sub[:favorites].to_i).to be >= 0
expect(sub[:comments]).to match(/[0-9]+/)
Expand Down Expand Up @@ -757,7 +757,7 @@
expect(sub[:category]).not_to be_blank
expect(sub[:theme]).not_to be_blank
expect(sub[:species]).not_to be_blank
expect(sub[:gender]).not_to be_blank
expect(sub[:gender]).to be_blank
expect(sub[:favorites]).to match(/[0-9]+/)
expect(sub[:favorites].to_i).to be_positive
expect(sub[:comments]).to match(/[0-9]+/)
Expand Down Expand Up @@ -861,6 +861,7 @@
end

it "handles non existent journal header" do
skip "Skipped: Current [2025-12-11] FA bug prevents footer from showing if header is unset"
journal_id = "9185944"
journal = @fa.journal(journal_id)
expect(journal[:title]).to eql("Testing journals")
Expand Down
49 changes: 35 additions & 14 deletions tests/integration/search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@

begin
@fa.search(args)
rescue FAStatusError => e
rescue [FAStatusError, FASlowdownError] => e
raise unless (retries += 1) <= 5

puts "FAStatusError on Search: #{e}, retry #{retries} in #{wait_between_tries} second(s)..."
puts "FA error on Search: #{e}, retry #{retries} in #{wait_between_tries} second(s)..."
sleep(wait_between_tries)
retry
end
Expand Down Expand Up @@ -69,10 +69,12 @@
results1 = search_with_retry({ "q" => "YCH" })
expect(results1).to be_instance_of Array
expect(results1).not_to be_empty
expect(results1.length).to be >= 20
# Get page 2
results2 = search_with_retry({ "q" => "YCH", "page" => "2" })
expect(results2).to be_instance_of Array
expect(results2).not_to be_empty
expect(results2.length).to be >= 20
# Check they're different enough
expect(results1).to be_different_results_to(results2)
end
Expand Down Expand Up @@ -121,12 +123,14 @@
end

it "defaults to ordering by date desc" do
results = search_with_retry({ "q" => "YCH", "perpage" => "72" })
expect(results).to be_instance_of Array
expect(results).not_to be_empty
results_date = search_with_retry({ "q" => "YCH", "perpage" => "72", "order_by" => "date" })
results = search_with_retry({ "q" => "YCH", "perpage" => "24" })
expect(results).to be_instance_of Array
expect(results).not_to be_empty
expect(results.length).to be >= 20
results_date = search_with_retry({ "q" => "YCH", "perpage" => "24", "order_by" => "date" })
expect(results_date).to be_instance_of Array
expect(results_date).not_to be_empty
expect(results_date.length).to be >= 20

# Check they're similar enough
expect(results).to be_similar_results_to(results_date)
Expand All @@ -141,16 +145,21 @@

it "can search by relevancy and popularity, which give a different order to date" do
results_date = search_with_retry({ "q" => "YCH", "perpage" => "24", "order_by" => "date" })
expect(results_date.length).to be >= 20
results_rele = search_with_retry({ "q" => "YCH", "perpage" => "24", "order_by" => "relevancy" })
expect(results_rele.length).to be >= 20
results_popu = search_with_retry({ "q" => "YCH", "perpage" => "24", "order_by" => "popularity" })
expect(results_popu.length).to be >= 20
expect(results_date).to be_different_results_to(results_rele)
expect(results_rele).to be_different_results_to(results_popu)
expect(results_popu).to be_different_results_to(results_date)
end

it "can specify order direction as ascending" do
results_asc = search_with_retry({ "q" => "YCH", "perpage" => "24", "order_direction" => "asc" })
expect(results_asc.length).to be >= 20
results_desc = search_with_retry({ "q" => "YCH", "perpage" => "24", "order_direction" => "desc" })
expect(results_desc.length).to be >= 20
expect(results_asc).to be_different_results_to(results_desc)
end

Expand All @@ -177,10 +186,14 @@
end

it "can specify search mode for the terms in the query" do
extended_or_results = search_with_retry({ "q" => "deer | lion", "perpage" => 72 })
extended_and_results = search_with_retry({ "q" => "deer & lion", "perpage" => 72 })
or_results = search_with_retry({ "q" => "deer lion", "perpage" => 72, "mode" => "any" })
and_results = search_with_retry({ "q" => "deer lion", "perpage" => 72, "mode" => "all" })
extended_or_results = search_with_retry({ "q" => "deer | lion", "perpage" => 24 })
expect(extended_or_results.length).to be >= 20
extended_and_results = search_with_retry({ "q" => "deer & lion", "perpage" => 24 })
expect(extended_and_results.length).to be >= 20
or_results = search_with_retry({ "q" => "deer lion", "perpage" => 24, "mode" => "any" })
expect(or_results.length).to be >= 20
and_results = search_with_retry({ "q" => "deer lion", "perpage" => 24, "mode" => "all" })
expect(and_results.length).to be >= 20

expect(extended_and_results).to be_different_results_to(extended_or_results)
expect(and_results).to be_different_results_to(or_results)
Expand All @@ -191,7 +204,9 @@

it "can specify ratings to display, and honours that selection" do
only_adult = search_with_retry({ "q" => "ych", "perpage" => 24, "rating" => "adult" })
expect(only_adult.length).to be >= 20
only_sfw_or_mature = search_with_retry({ "q" => "ych", "perpage" => 24, "rating" => "mature,general" })
expect(only_sfw_or_mature.length).to be >= 20

expect(only_adult).to be_different_results_to(only_sfw_or_mature)

Expand Down Expand Up @@ -229,14 +244,20 @@
end

it "can specify a content type for results, only returns that content type" do
results_poem = search_with_retry({ "q" => "deer", "perpage" => 72, "type" => "poetry" })
results_photo = search_with_retry({ "q" => "deer", "perpage" => 72, "type" => "photo" })
results_poem = search_with_retry({ "q" => "deer", "perpage" => 24, "type" => "poetry" })
expect(results_poem.length).to be >= 20
results_photo = search_with_retry({ "q" => "deer", "perpage" => 24, "type" => "photo" })
expect(results_photo.length).to be >= 20

expect(results_photo).to be_different_results_to(results_poem)
end

it "can specify multiple content types for results, and only displays those types" do
results_image = search_with_retry({ "q" => "deer", "perpage" => 72, "type" => "photo,art" })
results_swf_music = search_with_retry({ "q" => "deer", "perpage" => 72, "type" => "flash,music" })
results_image = search_with_retry({ "q" => "deer", "perpage" => 24, "type" => "photo,art" })
expect(results_image.length).to be >= 20
results_swf_music = search_with_retry({ "q" => "deer", "perpage" => 24, "type" => "flash,music" })
expect(results_swf_music.length).to be >= 20

expect(results_image).to be_different_results_to(results_swf_music)
end

Expand Down
Loading