From dcce27b394de59898480c9ed53cd890f995a93be Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Sun, 21 Jan 2018 22:03:24 +0000
Subject: [PATCH 01/10] Initial commit of bulk reading into numpy arrays

---
 fiona/_vectorized.pyx    | 116 +++++++++++++++++++++++++++++++++++++++
 fiona/ogrext.pxd         |   8 +++
 fiona/ogrext.pyx         |   7 ---
 fiona/ogrext1.pxd        |   3 +-
 fiona/ogrext2.pxd        |   3 +-
 requirements.txt         |   1 +
 setup.py                 |  20 ++++++-
 tests/test_vectorized.py |  46 ++++++++++++++++
 8 files changed, 193 insertions(+), 11 deletions(-)
 create mode 100644 fiona/_vectorized.pyx
 create mode 100644 fiona/ogrext.pxd
 create mode 100644 tests/test_vectorized.py

diff --git a/fiona/_vectorized.pyx b/fiona/_vectorized.pyx
new file mode 100644
index 000000000..e0032964d
--- /dev/null
+++ b/fiona/_vectorized.pyx
@@ -0,0 +1,116 @@
+from .ogrext cimport Session, _deleteOgrFeature
+from .ogrext import FIELD_TYPES, FIELD_TYPES_MAP, OGRERR_NONE
+from ._shim cimport *
+
+import logging
+from six import text_type
+
+import numpy as np
+cimport numpy as np
+
+log = logging.getLogger(__name__)
+
+def read_vectorized(collection):
+    cdef Session session
+    cdef void * cogr_feature
+    cdef void * cogr_geometry
+    cdef int num_fields
+    cdef void * fdefn
+    cdef int feature_index
+    cdef int field_index
+    cdef char * field_name_c
+    cdef bytes field_name_bytes
+    cdef str field_name
+    cdef int i
+    cdef long long [:] arr_int
+    cdef double [:] arr_double
+    cdef char * wkt
+
+    session = collection.session
+    encoding = session._fileencoding
+
+    if session.cogr_layer == NULL:
+        raise ValueError("Null layer")
+
+    length = OGR_L_GetFeatureCount(session.cogr_layer, 0)
+
+    data_geometry = np.empty([length], dtype=object)
+    data_properties = {}
+
+    schema = session.get_schema()
+    for field_name, field_type in schema["properties"].items():
+        field_type, precision = field_type.split(":")
+        if field_type == "int":
+            data_properties[field_name] = np.empty([length], dtype=np.int64)
+        elif field_type == "float":
+            data_properties[field_name] = np.empty([length], dtype=np.float64)
+        elif field_type == "str":
+            data_properties[field_name] = np.empty([length], dtype=object)
+        else:
+            # TODO: other types (dates, bytes, boolean subtype)
+            raise TypeError("Unexpected field type: {}".format(field_type))
+
+    for feature_index in range(length):
+        # TODO: this isn't the correct way to iterate over features
+        cogr_feature = OGR_L_GetFeature(session.cogr_layer, feature_index)
+
+        num_fields = OGR_F_GetFieldCount(cogr_feature)
+        for field_index in range(num_fields):
+            fdefn = OGR_F_GetFieldDefnRef(cogr_feature, field_index)
+
+            # field name
+            field_name_c = OGR_Fld_GetNameRef(fdefn)
+            field_name_bytes = field_name_c
+            field_name = field_name_bytes.decode(encoding)
+
+            # field type
+            field_type_id = OGR_Fld_GetType(fdefn)
+            field_type_name = FIELD_TYPES[field_type_id]
+            field_type = FIELD_TYPES_MAP[field_type_name]
+
+            if field_type is int:
+                arr_int = data_properties[field_name]
+                if is_field_null(cogr_feature, field_index):
+                    # TODO: this isn't the correct way to handle NULL for ints
+                    arr_int[feature_index] = 0
+                else:
+                    arr_int[feature_index] = OGR_F_GetFieldAsInteger64(cogr_feature, field_index)
+            elif field_type is float:
+                arr_double = data_properties[field_name]
+                if is_field_null(cogr_feature, field_index):
+                    arr_double[feature_index] = np.nan
+                else:
+                    arr_double[feature_index] = OGR_F_GetFieldAsDouble(cogr_feature, field_index)
+            elif field_type is text_type:
+                if is_field_null(cogr_feature, field_index):
+                    value = None
+                else:
+                    try:
+                        value = OGR_F_GetFieldAsString(cogr_feature, field_index)
+                        value = value.decode(encoding)
+                    except UnicodeDecodeError:
+                        log.warning(
+                            "Failed to decode %s using %s codec", value, encoding)
+                arr = data_properties[field_name]
+                arr[feature_index] = value
+            else:
+                raise TypeError("Unexpected field type: {}".format(field_type))
+
+        # TODO: best way to return geometries for shapely?
+        cogr_geometry = OGR_F_GetGeometryRef(cogr_feature)
+        if cogr_geometry == NULL:
+            data_geometry[feature_index] = None
+        else:
+            result = OGR_G_ExportToWkt(cogr_geometry, &wkt)
+            if result != OGRERR_NONE:
+                raise ValueError("Failed to export geometry to WKT")
+            data_geometry[feature_index] = wkt
+
+        _deleteOgrFeature(cogr_feature)
+
+    features = {
+        "geometry": data_geometry,
+        "properties": data_properties,
+    }
+
+    return features
diff --git a/fiona/ogrext.pxd b/fiona/ogrext.pxd
new file mode 100644
index 000000000..66adb5587
--- /dev/null
+++ b/fiona/ogrext.pxd
@@ -0,0 +1,8 @@
+cdef class Session:
+    cdef void *cogr_ds
+    cdef void *cogr_layer
+    cdef object _fileencoding
+    cdef object _encoding
+    cdef object collection
+
+cdef _deleteOgrFeature(void *cogr_feature)
diff --git a/fiona/ogrext.pyx b/fiona/ogrext.pyx
index 4e3dcd666..029e18482 100644
--- a/fiona/ogrext.pyx
+++ b/fiona/ogrext.pyx
@@ -397,13 +397,6 @@ def featureRT(feature, collection):
 # Collection-related extension classes and functions
 
 cdef class Session:
-
-    cdef void *cogr_ds
-    cdef void *cogr_layer
-    cdef object _fileencoding
-    cdef object _encoding
-    cdef object collection
-
     def __init__(self):
         self.cogr_ds = NULL
         self.cogr_layer = NULL
diff --git a/fiona/ogrext1.pxd b/fiona/ogrext1.pxd
index 305ed505d..fed22f236 100644
--- a/fiona/ogrext1.pxd
+++ b/fiona/ogrext1.pxd
@@ -135,7 +135,8 @@ cdef extern from "ogr_api.h":
     void *  OGR_G_CreateGeometry (int wkbtypecode)
     void    OGR_G_DestroyGeometry (void *geometry)
     unsigned char *  OGR_G_ExportToJson (void *geometry)
-    void    OGR_G_ExportToWkb (void *geometry, int endianness, char *buffer)
+    OGRErr  OGR_G_ExportToWkb (void *geometry, int endianness, char *buffer)
+    OGRErr  OGR_G_ExportToWkt (void *geometry, char **wkt)
     int     OGR_G_GetCoordinateDimension (void *geometry)
     int     OGR_G_GetGeometryCount (void *geometry)
     unsigned char *  OGR_G_GetGeometryName (void *geometry)
diff --git a/fiona/ogrext2.pxd b/fiona/ogrext2.pxd
index 9eecd832a..544375375 100644
--- a/fiona/ogrext2.pxd
+++ b/fiona/ogrext2.pxd
@@ -196,7 +196,8 @@ cdef extern from "ogr_api.h":
     void *  OGR_G_CreateGeometry (int wkbtypecode)
     void    OGR_G_DestroyGeometry (void *geometry)
     unsigned char *  OGR_G_ExportToJson (void *geometry)
-    void    OGR_G_ExportToWkb (void *geometry, int endianness, char *buffer)
+    OGRErr  OGR_G_ExportToWkb (void *geometry, int endianness, char *buffer)
+    OGRErr  OGR_G_ExportToWkt (void *geometry, char **wkt)
     int     OGR_G_GetCoordinateDimension (void *geometry)
     int     OGR_G_GetGeometryCount (void *geometry)
     unsigned char *  OGR_G_GetGeometryName (void *geometry)
diff --git a/requirements.txt b/requirements.txt
index 563bac9ce..f42d07625 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ cligj>=0.4
 six>=1.7
 ordereddict
 munch
+numpy
diff --git a/setup.py b/setup.py
index 44ce8a513..b87116c7e 100644
--- a/setup.py
+++ b/setup.py
@@ -9,6 +9,13 @@
 from setuptools import setup
 from setuptools.extension import Extension
 
+# NumPy is required for vectorized submodule
+try:
+    import numpy as np
+except ImportError:
+    has_numpy = False
+else:
+    has_numpy = True
 
 # Use Cython if available.
 try:
@@ -180,6 +187,9 @@ def run(self):
     libraries=libraries,
     extra_link_args=extra_link_args)
 
+if has_numpy:
+    ext_options["include_dirs"] = [np.get_include()]
+
 ext_options_cpp = ext_options.copy()
 # GDAL 2.3+ requires C++11
 if sys.platform == "win32":
@@ -212,14 +222,17 @@ def run(self):
             shutil.copy('fiona/_shim2.pyx', 'fiona/_shim.pyx')
             shutil.copy('fiona/_shim2.pxd', 'fiona/_shim.pxd')
 
-    ext_modules = cythonize([
+    ext_modules = [
         Extension('fiona._geometry', ['fiona/_geometry.pyx'], **ext_options),
         Extension('fiona._transform', ['fiona/_transform.pyx'], **ext_options_cpp),
         Extension('fiona._crs', ['fiona/_crs.pyx'], **ext_options),
         Extension('fiona._drivers', ['fiona/_drivers.pyx'], **ext_options),
         Extension('fiona._err', ['fiona/_err.pyx'], **ext_options),
         Extension('fiona._shim', ['fiona/_shim.pyx'], **ext_options),
-        Extension('fiona.ogrext', ['fiona/ogrext.pyx'], **ext_options)])
+        Extension('fiona.ogrext', ['fiona/ogrext.pyx'], **ext_options)]
+    if has_numpy:
+        ext_modules.append(Extension('fiona._vectorized', ['fiona/_vectorized.pyx'], **ext_options))
+    ext_modules = cythonize(ext_modules)
 
 # If there's no manifest template, as in an sdist, we just specify .c files.
 elif "clean" not in sys.argv:
@@ -231,6 +244,9 @@ def run(self):
         Extension('fiona._err', ['fiona/_err.c'], **ext_options),
         Extension('fiona.ogrext', ['fiona/ogrext.c'], **ext_options),
     ]
+    if has_numpy:
+        ext_modules.append(Extension('fiona._vectorized', ['fiona/_vectorized.c'], **ext_options))
+    ext_modules = cythonize(ext_modules)
 
     if gdal_major_version == 1:
         log.info("Building Fiona for gdal 1.x: {0}".format(gdalversion))
diff --git a/tests/test_vectorized.py b/tests/test_vectorized.py
new file mode 100644
index 000000000..a534baa63
--- /dev/null
+++ b/tests/test_vectorized.py
@@ -0,0 +1,46 @@
+import pytest
+import fiona
+from six import integer_types, string_types
+try:
+    from fiona._vectorized import read_vectorized
+    has_vectorized = True
+except ImportError:
+    has_vectorized = False
+
+if has_vectorized:
+    import numpy as np
+    from numpy.testing import assert_allclose
+
+requires_vectorized = pytest.mark.skipif(not has_vectorized, reason="Vectorized submodule not available")
+
+@requires_vectorized
+def test_read_vectorized(path_coutwildrnp_shp):
+    with fiona.open(path_coutwildrnp_shp, "r") as collection:
+        features = read_vectorized(collection)
+    
+        assert len(features["geometry"]) == 67
+        assert features["geometry"].dtype == object
+        assert features["geometry"][0].decode("ascii").startswith("POLYGON (")
+        assert features["geometry"][-1].decode("ascii").startswith("POLYGON (")
+        # TODO: better checks for geometry
+        
+        # check number of properties
+        assert len(features["properties"]) == len(collection.schema["properties"])
+
+        # float
+        assert features["properties"]["PERIMETER"].dtype == np.float64
+        assert features["properties"]["PERIMETER"].shape == (67,)
+        assert_allclose(features["properties"]["PERIMETER"][0], 1.22107)
+        assert_allclose(features["properties"]["PERIMETER"][-1], 0.120627)
+        
+        # integer
+        assert features["properties"]["WILDRNP020"].dtype == np.int64
+        assert features["properties"]["WILDRNP020"].shape == (67,)
+        assert features["properties"]["WILDRNP020"][0] == 332
+        assert features["properties"]["WILDRNP020"][-1] == 511
+        
+        # string
+        assert isinstance(features["properties"]["NAME"].dtype, object)
+        assert features["properties"]["NAME"].shape == (67,)
+        assert features["properties"]["NAME"][0] == "Mount Naomi Wilderness"
+        assert features["properties"]["NAME"][-1] == "Mesa Verde Wilderness"

From b7aec85ddde8202dd3f9f46cb2078d622359f8b7 Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Sun, 21 Jan 2018 22:54:23 +0000
Subject: [PATCH 02/10] Removed incorrect call to cythonize()

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index b87116c7e..0451368de 100644
--- a/setup.py
+++ b/setup.py
@@ -246,7 +246,6 @@ def run(self):
     ]
     if has_numpy:
         ext_modules.append(Extension('fiona._vectorized', ['fiona/_vectorized.c'], **ext_options))
-    ext_modules = cythonize(ext_modules)
 
     if gdal_major_version == 1:
         log.info("Building Fiona for gdal 1.x: {0}".format(gdalversion))

From 4742cad034e6a5f171e0e9c1c8c738d6daca2029 Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Mon, 22 Jan 2018 12:50:10 +0000
Subject: [PATCH 03/10] Append NumPy include dir, not overwrite

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0451368de..274005a36 100644
--- a/setup.py
+++ b/setup.py
@@ -188,7 +188,7 @@ def run(self):
     extra_link_args=extra_link_args)
 
 if has_numpy:
-    ext_options["include_dirs"] = [np.get_include()]
+    ext_options["include_dirs"].append(np.get_include())
 
 ext_options_cpp = ext_options.copy()
 # GDAL 2.3+ requires C++11

From 395c0e5b30a87a0851bd9160865eed1df35470c8 Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Mon, 22 Jan 2018 13:37:30 +0000
Subject: [PATCH 04/10] Removed type from field name

---
 fiona/_vectorized.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fiona/_vectorized.pyx b/fiona/_vectorized.pyx
index e0032964d..fc271d23f 100644
--- a/fiona/_vectorized.pyx
+++ b/fiona/_vectorized.pyx
@@ -20,7 +20,6 @@ def read_vectorized(collection):
     cdef int field_index
     cdef char * field_name_c
     cdef bytes field_name_bytes
-    cdef str field_name
     cdef int i
     cdef long long [:] arr_int
     cdef double [:] arr_double

From f8fecfcdf88c30912a92248d9cfe0cc890c2221f Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Sat, 24 Mar 2018 13:47:44 +0000
Subject: [PATCH 05/10] Support ignore fields/geometry in read_vectorized

---
 fiona/_vectorized.pyx    | 35 +++++++++++++++++++++++++----------
 tests/test_vectorized.py | 24 +++++++++++++++++-------
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/fiona/_vectorized.pyx b/fiona/_vectorized.pyx
index fc271d23f..c0757bb59 100644
--- a/fiona/_vectorized.pyx
+++ b/fiona/_vectorized.pyx
@@ -33,11 +33,24 @@ def read_vectorized(collection):
 
     length = OGR_L_GetFeatureCount(session.cogr_layer, 0)
 
-    data_geometry = np.empty([length], dtype=object)
     data_properties = {}
 
+    if collection.ignore_fields:
+        ignore_fields = set(collection.ignore_fields)
+    else:
+        ignore_fields = set()
+
+    if collection.ignore_geometry:
+        ignore_geometry = True
+        data_geometry = None
+    else:
+        ignore_geometry = False
+        data_geometry = np.empty([length], dtype=object)
+
     schema = session.get_schema()
     for field_name, field_type in schema["properties"].items():
+        if field_name in ignore_fields:
+            continue
         field_type, precision = field_type.split(":")
         if field_type == "int":
             data_properties[field_name] = np.empty([length], dtype=np.int64)
@@ -61,6 +74,8 @@ def read_vectorized(collection):
             field_name_c = OGR_Fld_GetNameRef(fdefn)
             field_name_bytes = field_name_c
             field_name = field_name_bytes.decode(encoding)
+            if field_name in ignore_fields:
+                continue
 
             # field type
             field_type_id = OGR_Fld_GetType(fdefn)
@@ -95,15 +110,15 @@ def read_vectorized(collection):
             else:
                 raise TypeError("Unexpected field type: {}".format(field_type))
 
-        # TODO: best way to return geometries for shapely?
-        cogr_geometry = OGR_F_GetGeometryRef(cogr_feature)
-        if cogr_geometry == NULL:
-            data_geometry[feature_index] = None
-        else:
-            result = OGR_G_ExportToWkt(cogr_geometry, &wkt)
-            if result != OGRERR_NONE:
-                raise ValueError("Failed to export geometry to WKT")
-            data_geometry[feature_index] = wkt
+        if not ignore_geometry:
+            cogr_geometry = OGR_F_GetGeometryRef(cogr_feature)
+            if cogr_geometry == NULL:
+                data_geometry[feature_index] = None
+            else:
+                result = OGR_G_ExportToWkt(cogr_geometry, &wkt)
+                if result != OGRERR_NONE:
+                    raise ValueError("Failed to export geometry to WKT")
+                data_geometry[feature_index] = wkt
 
         _deleteOgrFeature(cogr_feature)
 
diff --git a/tests/test_vectorized.py b/tests/test_vectorized.py
index a534baa63..5d62e9089 100644
--- a/tests/test_vectorized.py
+++ b/tests/test_vectorized.py
@@ -3,17 +3,12 @@
 from six import integer_types, string_types
 try:
     from fiona._vectorized import read_vectorized
-    has_vectorized = True
 except ImportError:
-    has_vectorized = False
-
-if has_vectorized:
+    pytestmark = pytest.mark.skip
+else:
     import numpy as np
     from numpy.testing import assert_allclose
 
-requires_vectorized = pytest.mark.skipif(not has_vectorized, reason="Vectorized submodule not available")
-
-@requires_vectorized
 def test_read_vectorized(path_coutwildrnp_shp):
     with fiona.open(path_coutwildrnp_shp, "r") as collection:
         features = read_vectorized(collection)
@@ -44,3 +39,18 @@ def test_read_vectorized(path_coutwildrnp_shp):
         assert features["properties"]["NAME"].shape == (67,)
         assert features["properties"]["NAME"][0] == "Mount Naomi Wilderness"
         assert features["properties"]["NAME"][-1] == "Mesa Verde Wilderness"
+
+def test_ignore_fields(path_coutwildrnp_shp):
+    with fiona.open(path_coutwildrnp_shp, ignore_fields=["NAME"]) as collection:
+        features = read_vectorized(collection)
+
+        assert "PERIMETER" in features["properties"]
+        assert "WILDRNP020" in features["properties"]
+        assert "NAME" not in features["properties"]
+
+        assert features["geometry"] is not None
+
+def test_ignore_geometry(path_coutwildrnp_shp):
+    with fiona.open(path_coutwildrnp_shp, ignore_geometry=True) as collection:
+        features = read_vectorized(collection)
+        assert features["geometry"] is None

From 6784292af0d41e2d99ba2bfd9cdfb8868d21ffeb Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Sat, 24 Mar 2018 13:48:36 +0000
Subject: [PATCH 06/10] Ignore fiona/_vectorized.c

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index b16558592..f6847a21e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,6 +76,7 @@ fiona/_shim2.c
 fiona/_shim22.c
 fiona/_shim.pxd
 fiona/_shim.pyx
+fiona/_vectorized.c
 tests/data/coutwildrnp.json
 tests/data/coutwildrnp.tar
 tests/data/coutwildrnp.zip

From 58bcba91bf8e7bf9532a0f59e6b6f308fe1ab17c Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Sat, 24 Mar 2018 14:30:13 +0000
Subject: [PATCH 07/10] Support binary fields in read_vectorized

---
 fiona/_vectorized.pyx      | 27 ++++++++++++++++---
 tests/test_binary_field.py | 55 ++++++++++++++++++++------------------
 tests/test_vectorized.py   | 16 +++++++++++
 3 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/fiona/_vectorized.pyx b/fiona/_vectorized.pyx
index c0757bb59..f2d4f8162 100644
--- a/fiona/_vectorized.pyx
+++ b/fiona/_vectorized.pyx
@@ -21,6 +21,8 @@ def read_vectorized(collection):
     cdef char * field_name_c
     cdef bytes field_name_bytes
     cdef int i
+    cdef int l
+    cdef long long fid
     cdef long long [:] arr_int
     cdef double [:] arr_double
     cdef char * wkt
@@ -33,6 +35,7 @@ def read_vectorized(collection):
 
     length = OGR_L_GetFeatureCount(session.cogr_layer, 0)
 
+    data_fids = np.empty([length], dtype=object)
     data_properties = {}
 
     if collection.ignore_fields:
@@ -51,20 +54,31 @@ def read_vectorized(collection):
     for field_name, field_type in schema["properties"].items():
         if field_name in ignore_fields:
             continue
-        field_type, precision = field_type.split(":")
+        if ":" in field_type:
+            field_type, precision = field_type.split(":")
+        else:
+            precision = None
         if field_type == "int":
             data_properties[field_name] = np.empty([length], dtype=np.int64)
         elif field_type == "float":
             data_properties[field_name] = np.empty([length], dtype=np.float64)
         elif field_type == "str":
             data_properties[field_name] = np.empty([length], dtype=object)
+        elif field_type == "bytes":
+            data_properties[field_name] = np.empty([length], dtype=object)
         else:
             # TODO: other types (dates, bytes, boolean subtype)
             raise TypeError("Unexpected field type: {}".format(field_type))
 
+    OGR_L_ResetReading(session.cogr_layer)
     for feature_index in range(length):
-        # TODO: this isn't the correct way to iterate over features
-        cogr_feature = OGR_L_GetFeature(session.cogr_layer, feature_index)
+        cogr_feature = OGR_L_GetNextFeature(session.cogr_layer)
+
+        if cogr_feature == NULL:
+            raise ValueError("Failed to read feature {}".format(feature_index))
+
+        fid = OGR_F_GetFID(cogr_feature)
+        data_fids[feature_index] = str(fid)
 
         num_fields = OGR_F_GetFieldCount(cogr_feature)
         for field_index in range(num_fields):
@@ -83,6 +97,7 @@ def read_vectorized(collection):
             field_type = FIELD_TYPES_MAP[field_type_name]
 
             if field_type is int:
+                # TODO: support boolean subtype
                 arr_int = data_properties[field_name]
                 if is_field_null(cogr_feature, field_index):
                     # TODO: this isn't the correct way to handle NULL for ints
@@ -107,6 +122,11 @@ def read_vectorized(collection):
                             "Failed to decode %s using %s codec", value, encoding)
                 arr = data_properties[field_name]
                 arr[feature_index] = value
+            # TODO: support date dtype
+            elif field_type is bytes:
+                data = OGR_F_GetFieldAsBinary(cogr_feature, field_index, &l)
+                arr = data_properties[field_name]
+                arr[feature_index] = data[:l]
             else:
                 raise TypeError("Unexpected field type: {}".format(field_type))
 
@@ -123,6 +143,7 @@ def read_vectorized(collection):
         _deleteOgrFeature(cogr_feature)
 
     features = {
+        "id": data_fids,
         "geometry": data_geometry,
         "properties": data_properties,
     }
diff --git a/tests/test_binary_field.py b/tests/test_binary_field.py
index fc4376675..6548435e6 100644
--- a/tests/test_binary_field.py
+++ b/tests/test_binary_field.py
@@ -9,6 +9,33 @@
 from collections import OrderedDict
 from .conftest import requires_gpkg
 
+def write_binary_gpkg(path):
+    meta = {
+        "driver": "GPKG",
+        "schema": {
+            "geometry": "Point",
+            "properties": OrderedDict([
+                ("name", "str"),
+                ("data", "bytes"),
+            ])
+        }
+    }
+
+    # create some binary data to encode
+    data = binascii.a2b_hex(b"deadbeef")
+
+    # write the binary data to a BLOB field
+    with fiona.open(path, "w", **meta) as dst:
+        feature = {
+            "geometry": {"type": "Point", "coordinates": ((0,0))},
+            "properties": {
+                "name": "test",
+                "data": data
+            }
+        }
+        dst.write(feature)
+
+
 class TestBinaryField(unittest.TestCase):
     def setUp(self):
         self.tempdir = tempfile.mkdtemp()
@@ -18,33 +45,9 @@ def tearDown(self):
     
     @requires_gpkg
     def test_binary_field(self):
-        meta = {
-            "driver": "GPKG",
-            "schema": {
-                "geometry": "Point",
-                "properties": OrderedDict([
-                    ("name", "str"),
-                    ("data", "bytes"),
-                ])
-            }
-        }
-        
-        # create some binary data to encode
-        data = binascii.a2b_hex(b"deadbeef")
-        
-        # write the binary data to a BLOB field
         filename = os.path.join(self.tempdir, "binary_test.gpkg")
-        with fiona.open(filename, "w", **meta) as dst:
-            feature = {
-                "geometry": {"type": "Point", "coordinates": ((0,0))},
-                "properties": {
-                    "name": "test",
-                    "data": data
-                }
-            }
-            dst.write(feature)
-        
-        del(data)
+
+        write_binary_gpkg(filename)
         
         # read the data back and check consistency
         with fiona.open(filename, "r") as src:
diff --git a/tests/test_vectorized.py b/tests/test_vectorized.py
index 5d62e9089..fdd8ef2f1 100644
--- a/tests/test_vectorized.py
+++ b/tests/test_vectorized.py
@@ -1,5 +1,6 @@
 import pytest
 import fiona
+import binascii
 from six import integer_types, string_types
 try:
     from fiona._vectorized import read_vectorized
@@ -8,6 +9,8 @@
 else:
     import numpy as np
     from numpy.testing import assert_allclose
+from .conftest import requires_gpkg
+from .test_binary_field import write_binary_gpkg
 
 def test_read_vectorized(path_coutwildrnp_shp):
     with fiona.open(path_coutwildrnp_shp, "r") as collection:
@@ -54,3 +57,16 @@ def test_ignore_geometry(path_coutwildrnp_shp):
     with fiona.open(path_coutwildrnp_shp, ignore_geometry=True) as collection:
         features = read_vectorized(collection)
         assert features["geometry"] is None
+
+@requires_gpkg
+def test_binary_field(tmpdir):
+    filename = str(tmpdir.join("test.gpkg"))
+    write_binary_gpkg(filename)
+
+    with fiona.open(filename, "r") as collection:
+        print(collection.schema)
+        features = read_vectorized(collection)
+
+        assert(features["properties"]["name"][0] == "test")
+        data = features["properties"]["data"][0]
+        assert(binascii.b2a_hex(data) == b"deadbeef")

From b58ff97dac49144596a66f8ef5ff833ba0666421 Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Sat, 24 Mar 2018 15:24:42 +0000
Subject: [PATCH 08/10] Support datetime fields in read_vectorized

---
 fiona/_vectorized.pyx    | 30 +++++++++++++++++++++++++++++-
 tests/test_vectorized.py | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/fiona/_vectorized.pyx b/fiona/_vectorized.pyx
index f2d4f8162..ab31cb526 100644
--- a/fiona/_vectorized.pyx
+++ b/fiona/_vectorized.pyx
@@ -1,9 +1,11 @@
 from .ogrext cimport Session, _deleteOgrFeature
 from .ogrext import FIELD_TYPES, FIELD_TYPES_MAP, OGRERR_NONE
 from ._shim cimport *
+from fiona.rfc3339 import FionaDateType, FionaDateTimeType, FionaTimeType
 
 import logging
 from six import text_type
+import datetime
 
 import numpy as np
 cimport numpy as np
@@ -22,6 +24,13 @@ def read_vectorized(collection):
     cdef bytes field_name_bytes
     cdef int i
     cdef int l
+    cdef int y = 0
+    cdef int m = 0
+    cdef int d = 0
+    cdef int hh = 0
+    cdef int mm = 0
+    cdef int ss = 0
+    cdef int tz = 0
     cdef long long fid
     cdef long long [:] arr_int
     cdef double [:] arr_double
@@ -66,6 +75,13 @@ def read_vectorized(collection):
             data_properties[field_name] = np.empty([length], dtype=object)
         elif field_type == "bytes":
             data_properties[field_name] = np.empty([length], dtype=object)
+        elif field_type == "date":
+            data_properties[field_name] = np.empty([length], dtype='datetime64[D]')
+        elif field_type == "time":
+            # numpy has no dtype for time without date
+            data_properties[field_name] = np.empty([length], dtype=object)
+        elif field_type == "datetime":
+            data_properties[field_name] = np.empty([length], dtype='datetime64[s]')
         else:
             # TODO: other types (dates, bytes, boolean subtype)
             raise TypeError("Unexpected field type: {}".format(field_type))
@@ -122,7 +138,19 @@ def read_vectorized(collection):
                             "Failed to decode %s using %s codec", value, encoding)
                 arr = data_properties[field_name]
                 arr[feature_index] = value
-            # TODO: support date dtype
+            elif field_type in (FionaDateType, FionaTimeType, FionaDateTimeType):
+                arr = data_properties[field_name]
+                retval = OGR_F_GetFieldAsDateTime(
+                    cogr_feature, field_index, &y, &m, &d, &hh, &mm, &ss, &tz)
+                if not retval:
+                    arr[feature_index] = None
+                else:
+                    if field_type is FionaDateType:
+                        arr[feature_index] = datetime.date(y, m, d).isoformat()
+                    elif field_type is FionaTimeType:
+                        arr[feature_index] = datetime.time(hh, mm, ss).isoformat()
+                    else:
+                        arr[feature_index] = datetime.datetime(y, m, d, hh, mm, ss).isoformat()
             elif field_type is bytes:
                 data = OGR_F_GetFieldAsBinary(cogr_feature, field_index, &l)
                 arr = data_properties[field_name]
diff --git a/tests/test_vectorized.py b/tests/test_vectorized.py
index fdd8ef2f1..17adf6053 100644
--- a/tests/test_vectorized.py
+++ b/tests/test_vectorized.py
@@ -70,3 +70,36 @@ def test_binary_field(tmpdir):
         assert(features["properties"]["name"][0] == "test")
         data = features["properties"]["data"][0]
         assert(binascii.b2a_hex(data) == b"deadbeef")
+
+@requires_gpkg  # ESRI Shapefile doesn't support datetime fields
+def test_datetime_fields(tmpdir):
+    filename = str(tmpdir.join("test.gpkg"))
+    schema = {
+        "geometry": "Point",
+        "properties": [
+            ("date", "date"),
+            ("datetime", "datetime"),
+            ("nulldt", "datetime"),
+        ]
+    }
+    with fiona.open(filename, "w", driver="GPKG", schema=schema) as dst:
+        feature = {
+            "geometry": None,
+            "properties": {
+                "date": "2018-03-24",
+                "datetime": "2018-03-24T15:06:01",
+                "nulldt": None,
+            }
+        }
+        dst.write(feature)
+
+    with fiona.open(filename, "r") as src:
+        features = read_vectorized(src)
+
+        assert features["properties"]["date"].dtype.name == "datetime64[D]"
+        assert features["properties"]["datetime"].dtype.name == "datetime64[s]"
+        assert features["properties"]["nulldt"].dtype.name == "datetime64[s]"
+
+        assert features["properties"]["date"][0] == np.datetime64("2018-03-24")
+        assert features["properties"]["datetime"][0] == np.datetime64("2018-03-24T15:06:01")
+        assert str(features["properties"]["nulldt"][0]) == "NaT"

From 35a4308502e4760ec06e757dfb5c0d466526f14c Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Sat, 24 Mar 2018 15:42:12 +0000
Subject: [PATCH 09/10] Comment tweaks

---
 fiona/_vectorized.pyx | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fiona/_vectorized.pyx b/fiona/_vectorized.pyx
index ab31cb526..fdc86f170 100644
--- a/fiona/_vectorized.pyx
+++ b/fiona/_vectorized.pyx
@@ -83,7 +83,6 @@ def read_vectorized(collection):
         elif field_type == "datetime":
             data_properties[field_name] = np.empty([length], dtype='datetime64[s]')
         else:
-            # TODO: other types (dates, bytes, boolean subtype)
             raise TypeError("Unexpected field type: {}".format(field_type))
 
     OGR_L_ResetReading(session.cogr_layer)
@@ -116,7 +115,7 @@ def read_vectorized(collection):
                 # TODO: support boolean subtype
                 arr_int = data_properties[field_name]
                 if is_field_null(cogr_feature, field_index):
-                    # TODO: this isn't the correct way to handle NULL for ints
+                    # TODO: is this the best way to handle NULL values for int?
                     arr_int[feature_index] = 0
                 else:
                     arr_int[feature_index] = OGR_F_GetFieldAsInteger64(cogr_feature, field_index)

From dc4ab3d7ba472716933e8700e0ef6fb37b9bf8be Mon Sep 17 00:00:00 2001
From: Joshua Arnott <josh@snorfalorpagus.net>
Date: Sat, 24 Mar 2018 16:02:21 +0000
Subject: [PATCH 10/10] Support for WKB as geometry type in read_vectorized

---
 fiona/_vectorized.pyx    | 12 +++++++++++-
 tests/test_vectorized.py | 10 ++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/fiona/_vectorized.pyx b/fiona/_vectorized.pyx
index fdc86f170..84fa37e02 100644
--- a/fiona/_vectorized.pyx
+++ b/fiona/_vectorized.pyx
@@ -1,6 +1,7 @@
 from .ogrext cimport Session, _deleteOgrFeature
 from .ogrext import FIELD_TYPES, FIELD_TYPES_MAP, OGRERR_NONE
 from ._shim cimport *
+from libc.stdlib cimport malloc, free
 from fiona.rfc3339 import FionaDateType, FionaDateTimeType, FionaTimeType
 
 import logging
@@ -12,7 +13,7 @@ cimport numpy as np
 
 log = logging.getLogger(__name__)
 
-def read_vectorized(collection):
+def read_vectorized(collection, use_wkb=False):
     cdef Session session
     cdef void * cogr_feature
     cdef void * cogr_geometry
@@ -35,6 +36,7 @@ def read_vectorized(collection):
     cdef long long [:] arr_int
     cdef double [:] arr_double
     cdef char * wkt
+    cdef char * wkb
 
     session = collection.session
     encoding = session._fileencoding
@@ -161,6 +163,14 @@ def read_vectorized(collection):
             cogr_geometry = OGR_F_GetGeometryRef(cogr_feature)
             if cogr_geometry == NULL:
                 data_geometry[feature_index] = None
+            elif use_wkb:
+                length = OGR_G_WkbSize(cogr_geometry)
+                wkb = <char*>malloc(sizeof(char)*length)
+                result = OGR_G_ExportToWkb(cogr_geometry, 1, wkb)
+                if result != OGRERR_NONE:
+                    raise ValueError("Failed to export geometry to WKB")
+                data_geometry[feature_index] = wkb[:length]
+                free(wkb)
             else:
                 result = OGR_G_ExportToWkt(cogr_geometry, &wkt)
                 if result != OGRERR_NONE:
diff --git a/tests/test_vectorized.py b/tests/test_vectorized.py
index 17adf6053..248f724ea 100644
--- a/tests/test_vectorized.py
+++ b/tests/test_vectorized.py
@@ -103,3 +103,13 @@ def test_datetime_fields(tmpdir):
         assert features["properties"]["date"][0] == np.datetime64("2018-03-24")
         assert features["properties"]["datetime"][0] == np.datetime64("2018-03-24T15:06:01")
         assert str(features["properties"]["nulldt"][0]) == "NaT"
+
+def test_wkb(path_coutwildrnp_shp):
+    with fiona.open(path_coutwildrnp_shp, "r") as collection:
+        features = read_vectorized(collection, use_wkb=True)
+
+    geometry = features["geometry"][0]
+    assert geometry[0:1] == b"\x01"  # little endian
+    assert geometry[1:5] == b"\x03\x00\x00\x00"  # polygon
+    assert geometry[5:9] == b"\x01\x00\x00\x00"  # 1 ring
+    assert len(geometry) == 1325