|
| 1 | +#!/usr/bin/env ruby |
| 2 | + |
| 3 | +#------------------------------------------------- |
| 4 | +# get_issuu.rb - retrieve all jpg's for a document |
| 5 | +#------------------------------------------------- |
| 6 | + |
| 7 | +# 1. Open the issuu.com document in your web browser, as usual |
| 8 | +# example: http://issuu.com/iwishart/docs/thedivinity |
| 9 | +# 2. Read the document page count; set variable $PAGES below |
| 10 | +# 3. From browser menu, choose View > Source |
| 11 | +# 4. Do a text search for "documentId" |
| 12 | +# 5. Copy string such as "081230122554-f76b0df1e7464a149caf5158813252d9" |
| 13 | +# to $PUB variable below |
| 14 | +# 6. Execute script: |
| 15 | +# ruby get_issuu.rb |
| 16 | + |
| 17 | +require 'open-uri' |
| 18 | +require 'rmagick' |
| 19 | + |
| 20 | + |
| 21 | +def fetch_pdf(url) |
| 22 | + |
| 23 | + username = url.split("/")[3] |
| 24 | + docname = url.split("/")[5] |
| 25 | + query_url = "http://api.issuu.com/query?action=issuu.document.get_anonymous&format=json&documentUsername=#{username}&name=#{docname}&jsonCallback=C&_1341928054865=" |
| 26 | + |
| 27 | + num_pages = open(query_url).read.split('pageCount":')[1].split(",")[0].to_i |
| 28 | + pub_hash = open(url).grep(/documentId/)[0].split("documentId=")[1].split('"')[0] |
| 29 | + |
| 30 | + for x in 1..num_pages do |
| 31 | + s_name = "page_#{x}.jpg" |
| 32 | + f_name = "tmp/page_#{"%03d" % x}.jpg" |
| 33 | + puts(Time.now.strftime('%Y-%m-%d %X') +" - Downloaded: "+ s_name +" >> "+ f_name) |
| 34 | + open(f_name,"wb").write(open("http://image.issuu.com/#{pub_hash}/jpg/#{s_name}").read) |
| 35 | + end |
| 36 | + puts("#{Time.now.strftime('%Y-%m-%d %X')} - All pages have been downloaded") |
| 37 | + |
| 38 | + Dir["tmp/*.jpg"].each { |filename| |
| 39 | + im = Magick::Image.read(filename) |
| 40 | + im[0].write(filename + ".pdf") |
| 41 | + } |
| 42 | + |
| 43 | + `pdftk tmp/*.pdf cat output issue.pdf` |
| 44 | + `rm tmp/*.jpg` |
| 45 | + `rm tmp/page*.pdf` |
| 46 | +end |
| 47 | + |
| 48 | + |
| 49 | +fetch_pdf("http://issuu.com/teemuarina/docs/biohackers_handbook-sleep_9f915fcce3c2fd") |
| 50 | + |
| 51 | + |
| 52 | + |
| 53 | +#$PUB="120602085552-1d7234d1c80d4bc6986ed7468f19c3ca" |
| 54 | +#$URL = "http://issuu.com/edicomedizioni/docs/az_04_x_issuu" |
| 55 | +#$URL = "http://issuu.com/rwdmag/docs/digital_magazine_july" |
| 56 | +#$PAGES=44 |
| 57 | + |
| 58 | +# puts open("http://issuu.com/edicomedizioni/docs/az_04_x_issuu").read.grep("/documentId/") |
| 59 | + |
| 60 | +# sudo gem install pdf-toolkit --http-proxy http://localhost:3128 |
| 61 | +# sudo gem install pdf-merger --http-proxy http://localhost:3128 |
| 62 | + |
| 63 | +# mkdir pdf |
| 64 | +# for file in $(ls *.jpg); do convert $file pdf/$file.pdf ; done |
| 65 | +# pdftk *.pdf cat output join.pdf |
0 commit comments