Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
bcfb173
attachments are saved as intact files
dimitri-yatsenko Sep 13, 2019
18366e6
reform attachments and filepath
dimitri-yatsenko Sep 13, 2019
11d7d9b
complete implementation of external storage except for `clean`
dimitri-yatsenko Sep 16, 2019
22347ae
Merge branch 'dev' of https://github.com/datajoint/datajoint-python i…
dimitri-yatsenko Sep 16, 2019
7e7c183
refactor external storage
dimitri-yatsenko Sep 18, 2019
8aa1543
complete refactoring of external storage for version 0.12
dimitri-yatsenko Sep 20, 2019
a2d6f9a
rename attribute `basename` to `attachment_name` in external table
dimitri-yatsenko Sep 20, 2019
2c11a65
add __repr__ to ExternalMapping
dimitri-yatsenko Sep 20, 2019
3a1c6ab
external files are not copied if stage and store are the same
dimitri-yatsenko Sep 20, 2019
478b36a
make external tables require setting the `delete_external_files` argu…
dimitri-yatsenko Sep 20, 2019
bb1deab
update CHANGELOG
dimitri-yatsenko Sep 23, 2019
e2636ed
fix Python 3.4 compatibility
dimitri-yatsenko Sep 23, 2019
11a1f93
fix Python 3.4 and 3.5 compatibility
dimitri-yatsenko Sep 23, 2019
165f795
dropped support for Python 3.4
dimitri-yatsenko Sep 23, 2019
c18af74
Merge branch 'dev' of https://github.com/datajoint/datajoint-python i…
dimitri-yatsenko Sep 26, 2019
48147f1
minor changes in error messages
dimitri-yatsenko Oct 1, 2019
2e26c4a
Merge branch 'dev' of https://github.com/datajoint/datajoint-python i…
dimitri-yatsenko Oct 3, 2019
0c4fd1c
Update to pathlib in test init.
guzman-raphael Oct 3, 2019
6ba86e0
Update test_blob_migrate to be compatible for WIN10.
guzman-raphael Oct 3, 2019
43a5126
Fix WIN10 compatibility with KeyboardInterrupt and SystemExit excepti…
guzman-raphael Oct 3, 2019
c7ca34c
Merge pull request #4 from guzman-raphael/dimitri-attach
dimitri-yatsenko Oct 3, 2019
5c99e37
Fix WIN10 filepath to store as posix and fetch as user's platform.
guzman-raphael Oct 4, 2019
e2c3f23
Fix relpath for Python3.5.
guzman-raphael Oct 4, 2019
93aeefc
Fix copytree for Python3.5.
guzman-raphael Oct 4, 2019
20719ae
Fix typo.
guzman-raphael Oct 4, 2019
7ca0099
Fix for Python3.5.
guzman-raphael Oct 4, 2019
f3ffd63
Update coveralls.
guzman-raphael Oct 4, 2019
bb1b40f
Update coverall env vars.
guzman-raphael Oct 4, 2019
f05c50d
Merge pull request #5 from guzman-raphael/win-filepath
dimitri-yatsenko Oct 5, 2019
298efed
add environment variable DJ_SUPPORT_FILEPATH_MANAGEMENT to enable/dis…
dimitri-yatsenko Oct 7, 2019
e609fbe
Merge branch 'attach' of https://github.com/dimitri-yatsenko/datajoin…
dimitri-yatsenko Oct 7, 2019
6dda528
Update CHANGELOG.md
dimitri-yatsenko Oct 8, 2019
796dae6
Update CHANGELOG.md
dimitri-yatsenko Oct 8, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
rename attribute basename to attachment_name in external table
  • Loading branch information
dimitri-yatsenko committed Sep 20, 2019
commit a2d6f9a44b3d2f527c023ecdc99929a01a50d82c
31 changes: 17 additions & 14 deletions datajoint/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def definition(self):
hash : uuid # hash of contents (blob), of filename + contents (attach), or relative filepath (filepath)
---
size :bigint unsigned # size of object in bytes
basename=null : varchar(255) # the filename of an attachment
attachment_name=null : varchar(255) # the filename of an attachment
filepath=null : varchar(1000) # relative filepath or attachment filename
contents_hash=null : uuid # used for the filepath datatype
timestamp=CURRENT_TIMESTAMP :timestamp # automatic timestamp
Expand Down Expand Up @@ -179,26 +179,26 @@ def get(self, uuid):
# --- ATTACHMENTS ---

def upload_attachment(self, local_path):
basename = Path(local_path).name
uuid = uuid_from_file(local_path, init_string=basename + '\0')
external_path = self._make_uuid_path(uuid, '.' + basename)
attachment_name = Path(local_path).name
uuid = uuid_from_file(local_path, init_string=attachment_name + '\0')
external_path = self._make_uuid_path(uuid, '.' + attachment_name)
self._upload_file(local_path, external_path)
# insert tracking info
self.connection.query("""
INSERT INTO {tab} (hash, size, basename)
VALUES (%s, {size}, "{basename}")
INSERT INTO {tab} (hash, size, attachment_name)
VALUES (%s, {size}, "{attachment_name}")
ON DUPLICATE KEY UPDATE timestamp=CURRENT_TIMESTAMP""".format(
tab=self.full_table_name,
size=Path(local_path).stat().st_size,
basename=basename), args=[uuid.bytes])
attachment_name=attachment_name), args=[uuid.bytes])
return uuid

def get_attachment_basename(self, uuid):
return (self & {'hash': uuid}).fetch1('basename')
def get_attachment_name(self, uuid):
return (self & {'hash': uuid}).fetch1('attachment_name')

def download_attachment(self, uuid, basename, download_path):
def download_attachment(self, uuid, attachment_name, download_path):
""" save attachment from memory buffer into the save_path """
external_path = self._make_uuid_path(uuid, '.' + basename)
external_path = self._make_uuid_path(uuid, '.' + attachment_name)
self._download_file(external_path, download_path)

# --- FILEPATH ---
Expand Down Expand Up @@ -274,10 +274,10 @@ def fetch_external_paths(self, **fetch_kwargs):
"""
fetch_kwargs.update(as_dict=True)
paths = []
for item in self.fetch('hash', 'basename', 'filepath', **fetch_kwargs):
if item['basename']:
for item in self.fetch('hash', 'attachment_name', 'filepath', **fetch_kwargs):
if item['attachment_name']:
# attachments
path = self._make_uud_path(item['hash'], '.' + item['basename'])
path = self._make_uuid_path(item['hash'], '.' + item['attachment_name'])
elif item['filepath']:
# external filepaths
path = self._make_external_filepath(item['filepath'])
Expand Down Expand Up @@ -341,6 +341,9 @@ def __init__(self, schema):
self.schema = schema
self._tables = {}

def __repr__(self):
return "\n ".join(["External tables for schema {schema}:".format(schema=self.schema)] + list(self))

def __getitem__(self, store):
"""
Triggers the creation of an external table.
Expand Down
10 changes: 5 additions & 5 deletions datajoint/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,24 +59,24 @@ def _get(connection, attr, data, squeeze, download_path):
# 3. if exists and checksum passes then return the local filepath
# 4. Otherwise, download the remote file and return the new filepath
_uuid = uuid.UUID(bytes=data) if attr.is_external else None
basename = (extern.get_attachment_basename(_uuid) if attr.is_external
attachment_name = (extern.get_attachment_name(_uuid) if attr.is_external
else data.split(b"\0", 1)[0].decode())
local_filepath = Path(download_path) / basename
local_filepath = Path(download_path) / attachment_name
if local_filepath.is_file():
attachment_checksum = _uuid if attr.is_external else hash.uuid_from_buffer(data)
if attachment_checksum == hash.uuid_from_file(local_filepath, init_string=basename + '\0'):
if attachment_checksum == hash.uuid_from_file(local_filepath, init_string=attachment_name + '\0'):
return adapt(local_filepath) # checksum passed, no need to download again
# generate the next available alias filename
for n in itertools.count():
f = local_filepath.parent / (local_filepath.stem + '_%04x' % n + local_filepath.suffix)
if not f.is_file():
local_filepath = f
break
if attachment_checksum == hash.uuid_from_file(f, init_string=basename + '\0'):
if attachment_checksum == hash.uuid_from_file(f, init_string=attachment_name + '\0'):
return adapt(f) # checksum passed, no need to download again
# Save attachment
if attr.is_external:
extern.download_attachment(_uuid, basename, local_filepath)
extern.download_attachment(_uuid, attachment_name, local_filepath)
else:
# write from buffer
safe_write(local_filepath, data.split(b"\0", 1)[1])
Expand Down
4 changes: 2 additions & 2 deletions tests/test_blob_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,5 +203,5 @@ def test_query():
dj.config['cache'] = os.path.expanduser('~/temp/dj-cache')

test_mod = dj.create_virtual_module('test_mod', 'djtest_blob_migrate')
r = test_mod.A.fetch('blob_share')
assert_equal(test_mod.A.fetch('blob_share')[1][1], 2)
r = test_mod.A.fetch('blob_share', order_by='id')
assert_equal(r[1][1], 2)