Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions kaggle/api/kaggle_api_extended.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from __future__ import print_function
import csv
from datetime import datetime
import time
import io
import json
import os
Expand Down Expand Up @@ -1567,7 +1568,7 @@ def dataset_create_new_cli(self,
else:
print('Dataset creation error: ' + result.error)

def download_file(self, response, outfile, quiet=True, chunk_size=1048576):
def download_file(self, response, outfile, quiet=True, chunk_size=1048576, resume=True):
""" download a file to an output file based on a chunk size

Parameters
Expand All @@ -1576,32 +1577,62 @@ def download_file(self, response, outfile, quiet=True, chunk_size=1048576):
outfile: the output file to download to
quiet: suppress verbose output (default is True)
chunk_size: the size of the chunk to stream
resume: whether to resume an existing download
"""

outpath = os.path.dirname(outfile)
if not os.path.exists(outpath):
os.makedirs(outpath)
size = int(response.headers['Content-Length'])
size_read = 0
open_mode = 'wb'

remote_date = datetime.strptime(response.headers['Last-Modified'],
'%a, %d %b %Y %X %Z')
remote_date_timestamp = time.mktime(remote_date.timetuple())

if not quiet:
print('Downloading ' + os.path.basename(outfile) + ' to ' +
outpath)

file_exists = os.path.isfile(outfile)
resumable = 'Accept-Ranges' in response.headers and response.headers['Accept-Ranges'] == 'bytes'

if resume and resumable and file_exists:
size_read = os.path.getsize(outfile)
open_mode = 'ab'

if not quiet:
print("... resuming from %d bytes (%d bytes left) ..." % (size_read, size-size_read,))

request_history = response.retries.history[0]
response = self.api_client.request(
request_history.method,
request_history.redirect_location,
headers={'Range': 'bytes=%d-' % (size_read,)},
_preload_content=False
)

with tqdm(total=size,
initial=size_read,
unit='B',
unit_scale=True,
unit_divisor=1024,
disable=quiet) as pbar:
with open(outfile, 'wb') as out:
with open(outfile, open_mode) as out:
while True:
data = response.read(chunk_size)
if not data:
break
out.write(data)
os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
size_read = min(size, size_read + chunk_size)
pbar.update(len(data))
if not quiet:
print('\n', end='')

os.utime(outfile, times=(remote_date_timestamp, remote_date_timestamp))

def kernels_list(self,
page=1,
page_size=20,
Expand Down