Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Changes
1.1.5 (TBD)
------------------

- MemoryFile implementation has been improved so that it can support multi-part
S3 downloads (#1926).
- Members of the Resampling enum with a value > 7 can only be used in warp
operations (#1930). We now raise a ResamplingAlgorithmError if they are used
with non-warp read and writes.
Expand Down
8 changes: 6 additions & 2 deletions rasterio/_io.pxd
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
include "gdal.pxi"

cimport numpy as np

from rasterio._base cimport DatasetBase

include "gdal.pxi"


cdef class DatasetReaderBase(DatasetBase):
pass
Expand Down Expand Up @@ -32,6 +32,10 @@ cdef class InMemoryRaster:
cdef GDALRasterBandH band(self, int) except NULL


cdef class MemoryFileBase:
cdef VSILFILE * _vsif


ctypedef np.uint8_t DTYPE_UBYTE_t
ctypedef np.uint16_t DTYPE_UINT16_t
ctypedef np.int16_t DTYPE_INT16_t
Expand Down
161 changes: 59 additions & 102 deletions rasterio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -834,10 +834,10 @@ def silence_errors():
CPLPopErrorHandler()


cdef class MemoryFileBase(object):
cdef class MemoryFileBase:
"""Base for a BytesIO-like class backed by an in-memory file."""

def __init__(self, file_or_bytes=None, filename=None, ext=''):
def __init__(self, file_or_bytes=None, dirname=None, filename=None, ext=''):
"""A file in an in-memory filesystem.

Parameters
Expand All @@ -849,8 +849,9 @@ cdef class MemoryFileBase(object):
ext : str
A file extension for the in-memory file under /vsimem. Ignored if
filename was provided.

"""
cdef VSILFILE *vsi_handle = NULL
cdef VSILFILE *fp = NULL

if file_or_bytes:
if hasattr(file_or_bytes, 'read'):
Expand All @@ -864,33 +865,37 @@ cdef class MemoryFileBase(object):
else:
initial_bytes = b''

# Make an in-memory directory specific to this dataset to help organize
# auxiliary files.
self._dirname = dirname or str(uuid4())
VSIMkdir("/vsimem/{0}".format(self._dirname).encode("utf-8"), 0666)

if filename:
# GDAL's SRTMHGT driver requires the filename to be "correct" (match
# the bounds being written)
self.name = '/vsimem/{0}'.format(filename)
self.name = "/vsimem/{0}/{1}".format(self._dirname, filename)
else:
# GDAL 2.1 requires a .zip extension for zipped files.
self.name = '/vsimem/{0}.{1}'.format(uuid4(), ext.lstrip('.'))
self.name = "/vsimem/{0}/{0}.{1}".format(self._dirname, ext.lstrip('.'))

self._path = self.name.encode('utf-8')
self._pos = 0
self.closed = False

self._initial_bytes = initial_bytes
cdef unsigned char *buffer = self._initial_bytes

if self._initial_bytes:
self._vsif = VSIFileFromMemBuffer(
self._path, buffer, len(self._initial_bytes), 0)
self.mode = "r"

vsi_handle = VSIFileFromMemBuffer(
self._path, buffer, len(self._initial_bytes), 0)
else:
self._vsif = VSIFOpenL(self._path, "w+")
self.mode = "w+"

if vsi_handle == NULL:
raise IOError(
"Failed to create in-memory file using initial bytes.")
if self._vsif == NULL:
raise IOError("Failed to open in-memory file.")

if VSIFCloseL(vsi_handle) != 0:
raise IOError(
"Failed to properly close in-memory file.")
self.closed = False

def exists(self):
"""Test if the in-memory file exists.
Expand All @@ -899,18 +904,10 @@ cdef class MemoryFileBase(object):
-------
bool
True if the in-memory file exists.
"""
cdef VSILFILE *fp = NULL
cdef const char *cypath = self._path

with nogil:
fp = VSIFOpenL(cypath, 'r')

if fp != NULL:
VSIFCloseL(fp)
return True
else:
return False
"""
cdef VSIStatBufL st_buf
return VSIStatL(self._path, &st_buf) == 0

def __len__(self):
"""Length of the file's buffer in number of bytes.
Expand All @@ -921,105 +918,65 @@ cdef class MemoryFileBase(object):
"""
return self.getbuffer().size

def getbuffer(self):
"""Return a view on bytes of the file."""
cdef unsigned char *buffer = NULL
cdef vsi_l_offset buffer_len = 0
cdef np.uint8_t [:] buff_view

buffer = VSIGetMemFileBuffer(self._path, &buffer_len, 0)

if buffer == NULL or buffer_len == 0:
buff_view = np.array([], dtype='uint8')
else:
buff_view = <np.uint8_t[:buffer_len]>buffer
return buff_view

def close(self):
"""Close MemoryFile and release allocated memory."""
VSIUnlink(self._path)
self._pos = 0
self._initial_bytes = None
if self._vsif != NULL:
VSIFCloseL(self._vsif)
self._vsif = NULL
VSIRmdir(self._dirname.encode("utf-8"))
self.closed = True

def read(self, size=-1):
"""Read size bytes from MemoryFile."""
cdef VSILFILE *fp = NULL
# Return no bytes immediately if the position is at or past the
# end of the file.
length = len(self)

if self._pos >= length:
self._pos = length
return b''
def seek(self, offset, whence=0):
return VSIFSeekL(self._vsif, offset, whence)

if size == -1:
size = length - self._pos
def tell(self):
if self._vsif != NULL:
return VSIFTellL(self._vsif)
else:
size = min(size, length - self._pos)
return 0

cdef unsigned char *buffer = <unsigned char *>CPLMalloc(size)
def read(self, size=-1):
"""Read size bytes from MemoryFile."""
cdef bytes result
cdef unsigned char *buffer = NULL
cdef vsi_l_offset buffer_len = 0

fp = VSIFOpenL(self._path, 'r')
if size < 0:
buffer = VSIGetMemFileBuffer(self._path, &buffer_len, 0)
size = buffer_len

try:
fp = exc_wrap_vsilfile(fp)
if VSIFSeekL(fp, self._pos, 0) < 0:
raise IOError(
"Failed to seek to offset %s in %s.",
self._pos, self.name)
buffer = <unsigned char *>CPLMalloc(size)

objects_read = VSIFReadL(buffer, 1, size, fp)
try:
objects_read = VSIFReadL(buffer, 1, size, self._vsif)
result = <bytes>buffer[:objects_read]

finally:
VSIFCloseL(fp)
CPLFree(buffer)

self._pos += len(result)
return result

def seek(self, offset, whence=0):
"""Seek to position in MemoryFile."""
if whence == 0:
pos = offset
elif whence == 1:
pos = self._pos + offset
elif whence == 2:
pos = len(self) - offset
if pos < 0:
raise ValueError("negative seek position: {}".format(pos))
if pos > len(self):
raise ValueError("seek position past end of file: {}".format(pos))
self._pos = pos
return self._pos

def tell(self):
"""Tell current position in MemoryFile."""
return self._pos

def write(self, data):
"""Write data bytes to MemoryFile"""
cdef VSILFILE *fp = NULL
cdef const unsigned char *view = <bytes>data
n = len(data)

if not self.exists():
fp = exc_wrap_vsilfile(VSIFOpenL(self._path, 'w'))
else:
fp = exc_wrap_vsilfile(VSIFOpenL(self._path, 'r+'))
if VSIFSeekL(fp, self._pos, 0) < 0:
raise IOError(
"Failed to seek to offset %s in %s.", self._pos, self.name)

result = VSIFWriteL(view, 1, n, fp)
VSIFFlushL(fp)
VSIFCloseL(fp)

self._pos += result
result = VSIFWriteL(view, 1, n, self._vsif)
VSIFFlushL(self._vsif)
return result

def getbuffer(self):
"""Return a view on bytes of the file."""
cdef unsigned char *buffer = NULL
cdef vsi_l_offset buffer_len = 0
cdef np.uint8_t [:] buff_view

buffer = VSIGetMemFileBuffer(self._path, &buffer_len, 0)

if buffer == NULL or buffer_len == 0:
buff_view = np.array([], dtype='uint8')
else:
buff_view = <np.uint8_t[:buffer_len]>buffer
return buff_view


cdef class DatasetWriterBase(DatasetReaderBase):
"""Read-write access to raster data and metadata
Expand Down
11 changes: 9 additions & 2 deletions rasterio/gdal.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,16 @@ cdef extern from "cpl_string.h" nogil:
const char* CPLParseNameValue(const char *pszNameValue, char **ppszKey)


cdef extern from "sys/stat.h" nogil:
struct stat:
pass


cdef extern from "cpl_vsi.h" nogil:

ctypedef int vsi_l_offset
ctypedef FILE VSILFILE
ctypedef stat VSIStatBufL

unsigned char *VSIGetMemFileBuffer(const char *path,
vsi_l_offset *data_len,
Expand All @@ -66,14 +72,15 @@ cdef extern from "cpl_vsi.h" nogil:
VSILFILE* VSIFOpenL(const char *path, const char *mode)
int VSIFCloseL(VSILFILE *fp)
int VSIUnlink(const char *path)

int VSIMkdir(const char *path, long mode)
int VSIRmdir(const char *path)
int VSIFFlushL(VSILFILE *fp)
size_t VSIFReadL(void *buffer, size_t nSize, size_t nCount, VSILFILE *fp)
int VSIFSeekL(VSILFILE *fp, vsi_l_offset nOffset, int nWhence)
vsi_l_offset VSIFTellL(VSILFILE *fp)
int VSIFTruncateL(VSILFILE *fp, vsi_l_offset nNewSize)
size_t VSIFWriteL(void *buffer, size_t nSize, size_t nCount, VSILFILE *fp)

int VSIStatL(const char *pszFilename, VSIStatBufL *psStatBuf)

cdef extern from "ogr_srs_api.h" nogil:

Expand Down
6 changes: 3 additions & 3 deletions rasterio/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class MemoryFile(MemoryFileBase):
'width': 791}

"""
def __init__(self, file_or_bytes=None, filename=None, ext=''):
def __init__(self, file_or_bytes=None, dirname=None, filename=None, ext=''):
"""Create a new file in memory

Parameters
Expand All @@ -102,7 +102,7 @@ def __init__(self, file_or_bytes=None, filename=None, ext=''):
MemoryFile
"""
super(MemoryFile, self).__init__(
file_or_bytes=file_or_bytes, filename=filename, ext=ext)
file_or_bytes=file_or_bytes, dirname=dirname, filename=filename, ext=ext)

@ensure_env
def open(self, driver=None, width=None, height=None, count=None, crs=None,
Expand All @@ -125,7 +125,7 @@ def open(self, driver=None, width=None, height=None, count=None, crs=None,

if self.closed:
raise IOError("I/O operation on closed file.")
if self.exists():
if len(self) > 0:
log.debug("VSI path: {}".format(mempath.path))
return DatasetReader(mempath, driver=driver, sharing=sharing, **kwargs)
else:
Expand Down
30 changes: 26 additions & 4 deletions tests/test_memoryfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ def rgb_data_and_profile(path_rgb_byte_tif):
return data, profile


def test_initial_empty():
with MemoryFile() as memfile:
assert len(memfile) == 0
assert len(memfile.getbuffer()) == 0
assert memfile.tell() == 0


def test_initial_not_bytes():
"""Creating a MemoryFile from not bytes fails."""
with pytest.raises(TypeError):
Expand Down Expand Up @@ -116,6 +123,21 @@ def test_non_initial_bytes_in_two(rgb_file_bytes):
assert src.read().shape == (3, 718, 791)


def test_non_initial_bytes_in_two_reverse(rgb_file_bytes):
"""MemoryFile contents can be read from bytes in two steps, tail first, and opened.
Demonstrates fix of #1926."""
with MemoryFile() as memfile:
memfile.seek(600000)
assert memfile.write(rgb_file_bytes[600000:]) == len(rgb_file_bytes) - 600000
memfile.seek(0)
assert memfile.write(rgb_file_bytes[:600000]) == 600000
with memfile.open() as src:
assert src.driver == "GTiff"
assert src.count == 3
assert src.dtypes == ("uint8", "uint8", "uint8")
assert src.read().shape == (3, 718, 791)


def test_no_initial_bytes(rgb_data_and_profile):
"""An empty MemoryFile can be opened and written into."""
data, profile = rgb_data_and_profile
Expand Down Expand Up @@ -264,10 +286,10 @@ def test_memfile_copyfiles(path_rgb_msk_byte_tif):
"""Multiple files can be copied to a MemoryFile using copyfiles"""
with rasterio.open(path_rgb_msk_byte_tif) as src:
src_basename = os.path.basename(src.name)
with MemoryFile(filename=src_basename) as memfile:
with MemoryFile(dirname="foo", filename=src_basename) as memfile:
copyfiles(src.name, memfile.name)
with memfile.open() as rgb2:
assert sorted(rgb2.files) == sorted(['/vsimem/{}'.format(src_basename), '/vsimem/{}.msk'.format(src_basename)])
assert sorted(rgb2.files) == sorted(['/vsimem/foo/{}'.format(src_basename), '/vsimem/foo/{}.msk'.format(src_basename)])


def test_multi_memfile(path_rgb_msk_byte_tif):
Expand All @@ -277,9 +299,9 @@ def test_multi_memfile(path_rgb_msk_byte_tif):
with open(path_rgb_msk_byte_tif + '.msk', 'rb') as msk_fp:
msk_bytes = msk_fp.read()

with MemoryFile(tif_bytes, filename='foo.tif') as tifmemfile, MemoryFile(msk_bytes, filename='foo.tif.msk') as mskmemfile:
with MemoryFile(tif_bytes, dirname="bar", filename='foo.tif') as tifmemfile, MemoryFile(msk_bytes, dirname="bar", filename='foo.tif.msk') as mskmemfile:
with tifmemfile.open() as src:
assert sorted(src.files) == sorted(['/vsimem/foo.tif', '/vsimem/foo.tif.msk'])
assert sorted(os.path.basename(fn) for fn in src.files) == sorted(['foo.tif', 'foo.tif.msk'])
assert src.mask_flag_enums == ([MaskFlags.per_dataset],) * 3


Expand Down