Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
67e41ef
Fixed #367
dimitri-yatsenko Oct 12, 2017
f89bf0e
Merge branch 'master' of https://github.com/datajoint/datajoint-python
dimitri-yatsenko Oct 12, 2017
159cb51
fixed dependencies
dimitri-yatsenko Oct 12, 2017
bb88117
implemented the Union operator
dimitri-yatsenko Oct 13, 2017
9703126
bugfix in populate
dimitri-yatsenko Oct 13, 2017
4fa1abd
fixed a bug in server-side insert with missing attributes
dimitri-yatsenko Oct 13, 2017
d6b9f2a
work on cascading delete
dimitri-yatsenko Oct 19, 2017
c35ed7a
Merge branch 'master' of https://github.com/dimitri-yatsenko/datajoin…
dimitri-yatsenko Oct 19, 2017
7299058
fixed #375 -- added the max_calls argument to populate
dimitri-yatsenko Oct 19, 2017
3baf487
simplified insert from select
dimitri-yatsenko Oct 20, 2017
4969c36
consolidating the use of hashes
dimitri-yatsenko Oct 20, 2017
ce774be
added module `external` for handling external storage
dimitri-yatsenko Oct 20, 2017
ad650b0
minor fix in external, work in progress
dimitri-yatsenko Oct 20, 2017
860d04b
minor
dimitri-yatsenko Oct 20, 2017
2144b0f
changed how external storage is configured
dimitri-yatsenko Oct 20, 2017
cfc1e21
minor bug
dimitri-yatsenko Oct 20, 2017
0195cfc
minor formatting
dimitri-yatsenko Oct 20, 2017
d821bcf
removed the JobManager class -- it was never used
dimitri-yatsenko Oct 20, 2017
183b324
bugfix from previous commit
dimitri-yatsenko Oct 20, 2017
6164eb5
renamed the display_progress argument in populate()
dimitri-yatsenko Oct 20, 2017
96f0d20
Merge branch 'master' of https://github.com/dimitri-yatsenko/datajoin…
dimitri-yatsenko Oct 20, 2017
b37175b
rolled back unintended changes in delete
dimitri-yatsenko Oct 20, 2017
9688e5d
fixed typo JobsTable -> JobTable
dimitri-yatsenko Oct 20, 2017
bcc0048
fixed bugs introduced in recent commits
dimitri-yatsenko Oct 20, 2017
28208c2
Merge branch 'master' of https://github.com/datajoint/datajoint-python
dimitri-yatsenko Oct 20, 2017
04b1829
fixed test_requirements
dimitri-yatsenko Oct 20, 2017
1c8e071
fixed indentation
dimitri-yatsenko Oct 20, 2017
bb11c1b
minor
dimitri-yatsenko Oct 20, 2017
9fc36b4
minor
dimitri-yatsenko Oct 20, 2017
9fca90f
minor code refactor in schema.py
dimitri-yatsenko Oct 22, 2017
7069dad
improved the error message in autopopulate
dimitri-yatsenko Oct 22, 2017
3bc28c5
correction to previous commit
dimitri-yatsenko Oct 22, 2017
a3d1a53
Merge branch 'master' of https://github.com/datajoint/datajoint-python
dimitri-yatsenko Oct 24, 2017
d9cca7b
Merge branch 'master' of https://github.com/dimitri-yatsenko/datajoin…
dimitri-yatsenko Oct 24, 2017
1b37ee9
typo
dimitri-yatsenko Oct 24, 2017
cf3cf0c
made `make` and acceptable name for the populate callback (issue #387)
dimitri-yatsenko Oct 25, 2017
85b6587
small bugfix for rare cases with multiple inheritance
dimitri-yatsenko Oct 27, 2017
f936073
minor fix
dimitri-yatsenko Oct 27, 2017
2b622c1
Merge branch 'master' of https://github.com/dimitri-yatsenko/datajoin…
dimitri-yatsenko Oct 27, 2017
5270d80
minor fixes
dimitri-yatsenko Oct 29, 2017
85502b0
Merge branch 'master' of https://github.com/dimitri-yatsenko/datajoin…
dimitri-yatsenko Oct 30, 2017
bdba20d
undid an unintended change in delete
dimitri-yatsenko Oct 30, 2017
919efba
implemented declaration of external fields
dimitri-yatsenko Oct 30, 2017
e30dfb2
added tests for external storage
dimitri-yatsenko Oct 30, 2017
259950d
minor cleanup
dimitri-yatsenko Oct 30, 2017
3a7f416
minor cleanup
dimitri-yatsenko Oct 30, 2017
3594c67
added external storage tests
dimitri-yatsenko Nov 5, 2017
7eb114c
Merge branch 'master' of https://github.com/datajoint/datajoint-python
dimitri-yatsenko Nov 5, 2017
4d1af79
Completed basic implementation of external storage.
dimitri-yatsenko Nov 5, 2017
4a58a9a
ERD does not show dependencies on external storage
dimitri-yatsenko Nov 5, 2017
1ca0b09
again, the ERD no longer includes references to ~external
dimitri-yatsenko Nov 5, 2017
add950f
fixed #328: the jobs table now records the error stack
dimitri-yatsenko Nov 5, 2017
90c021e
fixes for #328
dimitri-yatsenko Nov 5, 2017
782a9a5
fixed #388 -- a more elegant way to skip duplicates in insert
dimitri-yatsenko Nov 5, 2017
383595d
followup to previous commit
dimitri-yatsenko Nov 5, 2017
794dc47
made insert from query more consistent with insert from variables
dimitri-yatsenko Nov 5, 2017
5ab3381
fixed issue #381 -- better error messages for syntax errors in declar…
dimitri-yatsenko Nov 5, 2017
4b2671e
typo from previous commit
dimitri-yatsenko Nov 6, 2017
9a0b902
set the strict mode at connection time
dimitri-yatsenko Nov 6, 2017
7302571
set sql_mode in connection
dimitri-yatsenko Nov 6, 2017
569881a
updated the sql_mode
dimitri-yatsenko Nov 6, 2017
86c2480
added tests for union and for external storage. Other minor fixes bas…
dimitri-yatsenko Nov 13, 2017
6f7c6bd
improved documentation and error messages for fetch and fetch1. Fixe…
dimitri-yatsenko Nov 14, 2017
8018a9d
added tests for external storage
dimitri-yatsenko Nov 15, 2017
7627569
changed the shape of the computed nodes in the ERD to elipse to avoid…
dimitri-yatsenko Nov 15, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added tests for external storage
  • Loading branch information
dimitri-yatsenko committed Oct 30, 2017
commit e30dfb24de8f0493e3375584ae9c269c80f16a32
2 changes: 1 addition & 1 deletion datajoint/autopopulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _job_key(self, key):
return key

def populate(self, *restrictions, suppress_errors=False, reserve_jobs=False,
order="original", limit=None, max_calls=None, display_progress=False):
order="original", limit=None, max_calls=None, display_progress=False):
"""
rel.populate() calls rel.make(key) for every primary key in self.key_source
for which there is not already a tuple in rel.
Expand Down
23 changes: 9 additions & 14 deletions datajoint/declare.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from . import DataJointError, config

STORE_NAME_LENGTH = 8
HASH_DATA_TYPE = 'varchar(43)'
STORE_HASH_LENGTH = 43
HASH_DATA_TYPE = 'char(51)'

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -191,25 +192,19 @@ def compile_attribute(line, in_key, foreign_key_sql):
else:
# process externally stored attribute
if in_key:
raise DataJointError('External attributes cannot be primary.')
raise DataJointError('External attributes cannot be primary in:\n%s' % line)
if not match['default'] in ('DEFAULT NULL', 'NOT NULL'):
raise DataJointError('The only acceptable default value for an external field is null in:\n%s' % line)
if match['type'] not in config:
raise DataJointError('The external store `{type}` is not configured.'.format(**match))
if len(match['type']) > STORE_NAME_LENGTH + STORE_NAME_LENGTH + 1:
raise DataJointError(
'The external store name `{type}` is too long. Must be <={max_len} characters.'.format(
max_len=STORE_NAME_LENGTH, **match))
sql = """
`_{name}` char({store_name_len}) COMMENT "{type}",
`{name}` {hash_type} {default}{comment}'
""".format(
comment=' COMMENT "{comment}"' if match['comment'] else '',
hash_type=HASH_DATA_TYPE,
store_name_len=STORE_NAME_LENGTH,
**match)
sql = "`_{name}` {hash_type} {default} COMMENT {comment:type}'".format(
hash_type=HASH_DATA_TYPE, **match)
foreign_key_sql.append(
"""
FOREIGN KEY (`_{name}`,`name`) REFERENCES {{external_table}} (`store`, `hash`)
ON UPDATE RESTRICT ON DELETE RESTRICT
""".format(**match))
"FOREIGN KEY (`_{name}`) REFERENCES {{external_table}} (`hash`) "
"ON UPDATE RESTRICT ON DELETE RESTRICT".format(**match))

return match['name'], sql, is_external
58 changes: 31 additions & 27 deletions datajoint/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .hash import long_hash
from .blob import pack, unpack
from .base_relation import BaseRelation
from .declare import HASH_DATA_TYPE, STORE_NAME_LENGTH
from .declare import STORE_HASH_LENGTH, STORE_NAME_LENGTH


class ExternalTable(BaseRelation):
Expand All @@ -17,8 +17,6 @@ def __init__(self, arg, database=None):
# copy constructor
self.database = arg.database
self._connection = arg._connection
self._definition = arg._definition
self._user = arg._user
return
super().__init__()
self.database = database
Expand All @@ -30,13 +28,12 @@ def __init__(self, arg, database=None):
def definition(self):
return """
# external storage tracking
store :char({store_name_length}) # the name of external store
hash :{hash_data_type} # the hash of stored object
hash :char({hash_len}) # the hash of stored object + store name
---
count = 1 :int # reference count
size :bigint unsigned # size of object in bytes
timestamp=CURRENT_TIMESTAMP :timestamp # automatic timestamp
""".format(store_name_length=STORE_NAME_LENGTH, hash_data_type=HASH_DATA_TYPE)
""".format(hash_len=STORE_HASH_LENGTH + STORE_NAME_LENGTH)

@property
def table_name(self):
Expand All @@ -46,17 +43,21 @@ def put(self, store, obj):
"""
put an object in external store
"""
try:
spec = config[store]
except KeyError:
raise DataJointError('Storage {store} is not configured'.format(store=store))

# serialize object
blob = pack(obj)
hash = long_hash(blob)
hash = long_hash(blob) + store[len('external-'):]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

store[len('external-'):] seems incongruent w/r/t line 88: store = 'external' + ('-' if store else '') + store which could conceivably allow only external; that said using only 'external' as a specifier is already blocked in declare.py so I'm not sure how important this is.. perhaps line 88 should throw an error rather than allowing 'external' only?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is to allow either external or external-somethin where somethin cannot exceed 8 characters. external by itself should not be blocked. If it is, it's my mistake and I will fix it and add a test for it. Many people will only have one external storage and they will just use external.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is store[len('external-'):] incongruent? You can do a[n:] where n exceeds the size of a no problem.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok - I think on first read I was taking this to mean there was an explicit expectation that the slice would return something to append to the hash, which was then "confirmed" when I saw that declare.py requires the external- convention (via the 'isidentifier' check on the secondary portion) and throws exceptions suggesting this is a requirement.

Not allowing the bare 'external' in some cases (here, declare.py) but allowing it in another (line 88) is why I proposed one was incongruent with respect to the other.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if 'external' alone is is allowed, it looks like the store_name parts from declare (line 191+) need to be altered -

>>> storestr='external'
>>> storestr.split('-')
['external']
>>> store_name = storestr.split('-')
>>> store_name[1:]
[]
>>> ''.join(store_name[1:])
''
>>> ''.join(store_name[1:]).isidentifier()
False

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. It should be isidentifier or empty.


# write object
try:
spec = config['external.%s' % store]
protocol = spec['protocol']
except KeyError:
raise DataJointError('storage.%s is not configured' % store)
raise DataJointError('Storage {store} config is missing the protocol field'.format(store=store))

if spec['protocol'] == 'file':
if protocol == 'file':
folder = os.path.join(spec['location'], self.database)
full_path = os.path.join(folder, hash)
if not os.path.isfile(full_path):
Expand All @@ -66,32 +67,38 @@ def put(self, store, obj):
except FileNotFoundError:
os.makedirs(folder)
with open(full_path, 'wb') as f:
f.write(blob)
f.write(blob)
else:
raise DataJointError('Unknown external storage %s' % store)
raise DataJointError('Unknown external storage protocol {protocol} for {store}'.format(
store=store, protocol=protocol))

# insert tracking info
self.connection.query(
"""
INSERT INTO `{db}`.`{table}` (store, hash, size) VALUES ({store}, {hash}, {size})
ON DUPLICATE KEY count=count+1, timestamp=CURRENT_TIMESTAMP""".format(
"INSERT INTO `{db}`.`{table}` (hash, size) VALUES ('{hash}', {size}) "
"ON DUPLICATE KEY UPDATE count=count+1, timestamp=CURRENT_TIMESTAMP".format(
db=self.database,
table=self.table_name,
store=store,
hash=hash,
hash=hash, # append the name of the store to the hash
size=len(blob)))
return hash

def get(self, store, hash):
def get(self, hash):
"""
get an object from external store
"""
store = hash[STORE_HASH_LENGTH:]
store = 'external' + ('-' if store else '') + store
try:
spec = config['external.%s' % store]
spec = config[store]
except KeyError:
raise DataJointError('storage.%s is not configured' % store)

if spec['protocol'] == 'file':
try:
protocol = spec['protocol']
except KeyError:
raise DataJointError('Storage {store} config is missing the protocol field'.format(store=store))

if protocol == 'file':
full_path = os.path.join(spec['location'], self.database, hash)
try:
with open(full_path, 'rb') as f:
Expand All @@ -103,16 +110,13 @@ def get(self, store, hash):

return unpack(blob)

def remove(self, store, hash):
def remove(self, hash):
"""
delete an object from external store
"""
# decrement count
self.connection.query(
"""
UPDATE `{db}`.`{table}` count=count-1 WHERE store={store} and hash={hash}
""".format(
'UPDATE `{db}`.`{table}` SET count=count-1 WHERE hash="{hash}"'.format(
db=self.database,
table=self.table_name,
store=store,
hash=hash))
hash=hash))
2 changes: 0 additions & 2 deletions datajoint/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,6 @@ def __getitem__(self, key):

def __setitem__(self, key, value):
logger.log(logging.INFO, u"Setting {0:s} to {1:s}".format(str(key), str(value)))
if isinstance(value, collections.Mapping):
raise ValueError("Nested settings are not supported!")
if validators[key](value):
self._conf[key] = value
else:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_blob.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from datajoint.blob import pack, unpack
from numpy.testing import assert_array_equal, raises
from numpy.testing import assert_array_equal
from nose.tools import assert_equal, assert_true


Expand Down
54 changes: 54 additions & 0 deletions tests/test_external.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import numpy as np
from numpy.testing import assert_array_equal
from nose.tools import assert_true, assert_equal
import datajoint as dj
from datajoint.external import ExternalTable

from . import PREFIX, CONN_INFO


dj.config['external'] = {
'protocol': 'file',
'location': 'dj-store/external'}

dj.config['external-raw'] = {
'protocol': 'file',
'location': 'dj-store/raw'}

dj.config['external-compute'] = {
'protocol': 's3',
'location': '/datajoint-projects/test',
'user': 'dimitri',
'token': '2e05709792545ce'
}

dj.config['cache'] = {
'protocol': 'file',
'location': '/media/dimitri/ExtraDrive1/dj-store/cache'}


schema = dj.schema(PREFIX + '_external_test1', locals(), connection=dj.conn(**CONN_INFO))


def test_external_put_get_remove():
"""
external storage put and get and remove
"""
ext = ExternalTable(schema.connection, schema.database)
input_ = np.random.randn(3, 7, 8)
count = 7
extra = 3
for i in range(count):
hash1 = ext.put('external-raw', input_)
for i in range(extra):
hash2 = ext.put('external-raw', np.random.randn(4, 3, 2))

assert_true(hash1 in ext.fetch('hash'))
assert_equal(count, (ext & {'hash': hash1}).fetch1('count'))
assert_equal(len(ext), 1 + extra)

output_ = ext.get(hash1)
assert_array_equal(input_, output_)

ext.remove(hash1)
assert_equal(count-1, (ext & {'hash': hash1}).fetch1('count'))
13 changes: 0 additions & 13 deletions tests/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,6 @@ def test_singleton2():
assert_true(conf['dummy.val'] == 2, 'Config does not behave like a singleton.')


@raises(ValueError)
def test_nested_check():
"""Testing nested rejection"""
dummy = {'dummy.testval': {'notallowed': 2}}
dj.config.update(dummy)


@raises(dj.DataJointError)
def test_validator():
"""Testing validator"""
Expand Down Expand Up @@ -73,12 +66,6 @@ def test_repr():
assert_equal(repr(dj.config), pprint.pformat(dj.config._conf, indent=4))


@raises(ValueError)
def test_nested_check2():
"""Testing nested dictionary rejection"""
dj.config['dummy'] = {'dummy2':2}


def test_save():
"""Testing save of config"""
tmpfile = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
Expand Down