Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
5918b2c
move dj.config into settings.py
dimitri-yatsenko Dec 2, 2018
bc85f21
Merge branch 'dev' into attachments
dimitri-yatsenko Dec 2, 2018
92553b0
Merge branch 'master' of https://github.com/datajoint/datajoint-pytho…
dimitri-yatsenko Dec 2, 2018
c127693
Merge branch 'dev' into attachments
dimitri-yatsenko Dec 3, 2018
1b188da
Merge branch 'dev' into attachments
dimitri-yatsenko Dec 3, 2018
13573c5
Merge branch 'dev' into attachments
dimitri-yatsenko Dec 3, 2018
f3dd5b3
add properties for heading attributes for supporting configurable blo…
dimitri-yatsenko Dec 3, 2018
046291a
rename property is_supported to unsupported for heading attributes
dimitri-yatsenko Dec 3, 2018
1f5fe9d
load configurable fields
dimitri-yatsenko Dec 3, 2018
3350516
implement declaration of configurable attributes: blob- and attach.
dimitri-yatsenko Dec 3, 2018
acc07fb
prepare for saving attachments
dimitri-yatsenko Dec 3, 2018
c11bbbf
add attach.py for saving and loading attachments
dimitri-yatsenko Dec 3, 2018
390b0b7
implement inserting attachments
dimitri-yatsenko Dec 3, 2018
2c04d74
implement fetch of attachments and configurable blobs
dimitri-yatsenko Dec 3, 2018
43e9c76
fix issue #467
dimitri-yatsenko Dec 3, 2018
51336ff
further cleanup of __init__.py
dimitri-yatsenko Dec 3, 2018
cee588e
Use DEFAULT instead of NULL when the insert value is None.
dimitri-yatsenko Dec 3, 2018
c04f974
slight refactor of Table.insert
dimitri-yatsenko Dec 3, 2018
9de4782
fix for error introduced in previous commit
dimitri-yatsenko Dec 3, 2018
0c35af1
Merge branch 'master' of https://github.com/datajoint/datajoint-pytho…
dimitri-yatsenko Dec 3, 2018
1588303
Merge branch 'master' of https://github.com/datajoint/datajoint-pytho…
dimitri-yatsenko Dec 4, 2018
f265674
implement external file folding
dimitri-yatsenko Dec 7, 2018
bcebce5
Merge branch 'master' into attachments
dimitri-yatsenko Dec 11, 2018
50f17ce
remote the `keys` property from `fetch` (a warning was displayed in s…
dimitri-yatsenko Dec 11, 2018
f141aa7
add `dj.get_schema_names()`
dimitri-yatsenko Dec 11, 2018
6310c7d
stylistic improvements
dimitri-yatsenko Dec 13, 2018
7cb1d3f
Merge branch 'master' of https://github.com/datajoint/datajoint-pytho…
dimitri-yatsenko Dec 13, 2018
aa72832
Merge branch 'master' into attachments
dimitri-yatsenko Dec 13, 2018
4818bbb
Merge branch 'master' into attachments
dimitri-yatsenko Dec 19, 2018
3aa936e
complete implementation of attachments and configurable blobs with pa…
dimitri-yatsenko Jan 14, 2019
bdf8195
Merge branch 'master' of https://github.com/datajoint/datajoint-pytho…
dimitri-yatsenko Jan 14, 2019
173bf1d
add test for configurable blobs
dimitri-yatsenko Jan 14, 2019
61c2ce7
drop support of Python 3.4
dimitri-yatsenko Jan 14, 2019
aa6a2ce
add test for attachment methods
dimitri-yatsenko Jan 14, 2019
f49cf22
fix test_attach
dimitri-yatsenko Jan 15, 2019
eea3e20
fix 3.4 compatibility
dimitri-yatsenko Jan 15, 2019
7ee6134
Python 3.4 compatibility
dimitri-yatsenko Jan 15, 2019
6701abb
fix Python 3.4 compatibility
dimitri-yatsenko Jan 15, 2019
346f47f
fix Python 3.4 compatibility
dimitri-yatsenko Jan 15, 2019
b2087aa
fix Python 3.4 compatibility
dimitri-yatsenko Jan 15, 2019
332cfd6
Merge branch 'master' of https://github.com/datajoint/datajoint-pytho…
dimitri-yatsenko Jan 15, 2019
0c491e2
improve error message
dimitri-yatsenko Jan 15, 2019
7e51e4f
improve error message
dimitri-yatsenko Jan 15, 2019
0434fc8
Merge branch 'attachments' of https://github.com/dimitri-yatsenko/dat…
dimitri-yatsenko Jan 16, 2019
484e926
bugfix in S3 store
dimitri-yatsenko Jan 16, 2019
1a83fe6
bugfix in S3 store
dimitri-yatsenko Jan 16, 2019
4c8b6eb
Merge branch 'attachments' of https://github.com/dimitri-yatsenko/dat…
dimitri-yatsenko Jan 16, 2019
bf66d64
Merge branch 'master' of https://github.com/datajoint/datajoint-pytho…
dimitri-yatsenko Jan 22, 2019
8f4e8f9
Merge branch 'master' into attachments
dimitri-yatsenko Feb 4, 2019
0826d94
implement external storage cleanup with subfolding
dimitri-yatsenko Feb 6, 2019
fb14029
fix error message and release date
dimitri-yatsenko Feb 7, 2019
90cf697
improve warning messages
dimitri-yatsenko Feb 8, 2019
afeadb1
change version to 0.12.dev
dimitri-yatsenko Feb 8, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
implement external storage cleanup with subfolding
  • Loading branch information
dimitri-yatsenko committed Feb 6, 2019
commit 0826d94b866d971dde81e3a856afce20399a9360
4 changes: 2 additions & 2 deletions datajoint/erd.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ class ERD:
"""

def __init__(self, *args, **kwargs):
warnings.warn('ERD functionality depends on matplotlib and pygraphviz. Please install both of these '
'libraries to enable the ERD feature.')
warnings.warn('ERD functionality depends on matplotlib, networkx, and pygraphviz. '
'Please install both of these libraries to enable the ERD feature.')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are now three libraries mentioned.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

else:
class ERD(nx.DiGraph):
"""
Expand Down
29 changes: 21 additions & 8 deletions datajoint/external.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from tqdm import tqdm
import itertools
from .settings import config
from .errors import DataJointError
from .hash import long_hash
Expand Down Expand Up @@ -165,22 +165,35 @@ def delete_garbage(self):
for ref in self.references) or "TRUE")
print('Deleted %d items' % self.connection.query("SELECT ROW_COUNT()").fetchone()[0])

def clean_store(self, store, display_progress=True):
def clean_store(self, store, verbose=True):
"""
Clean unused data in an external storage repository from unused blobs.
This must be performed after delete_garbage during low-usage periods to reduce risks of data loss.
"""
spec = config.get_store_spec(store)
progress = tqdm if display_progress else lambda x: x
in_use = set(self.fetch('hash'))
in_use = set(x for x in (self & '`hash` LIKE "%%{store}"'.format(store=store)).fetch('hash'))
if spec['protocol'] == 'file':
for folder, _, files in progress(os.walk(os.path.join(spec['location'], self.database))):
for f in files:
if f not in in_use:
count = itertools.count()
print('Deleting...')
deleted_folders = set()
for folder, dirs, files in os.walk(os.path.join(spec['location'], self.database), topdown=False):
if dirs and files:
raise DataJointError('Invalid repository with files in non-terminal folder %s' % folder)
dirs = set(d for d in dirs if os.path.join(folder, d) not in deleted_folders)
if not dirs:
files_not_in_use = [f for f in files if f not in in_use]
for f in files_not_in_use:
filename = os.path.join(folder, f)
next(count)
if verbose:
print(filename)
os.remove(filename)
if len(files_not_in_use) == len(files):
os.rmdir(folder)
deleted_folders.add(folder)
print('Deleted %d objects' % next(count))
elif spec['protocol'] == 's3':
try:
s3.Folder(database=self.database, **spec).clean(in_use)
failed_deletes = s3.Folder(database=self.database, **spec).clean(in_use, verbose=verbose)
except TypeError:
raise DataJointError('External store {store} configuration is incomplete.'.format(store=store))
31 changes: 26 additions & 5 deletions datajoint/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,34 @@ def get(self, blob_hash):
except minio.error.NoSuchKey:
return None

def clean(self, exclude, max_count=None):
def clean(self, exclude, max_count=None, verbose=False):
"""
Delete all objects except for those in the exclude
:param exclude: a list of blob_hashes to skip.
:param max_count: maximum number of object to delete
:return: generator of objects that failed to delete
:param verbose: If True, print deleted objects
:return: list of objects that failed to delete
"""
return self.client.remove_objects(self.bucket, itertools.islice(
(x.object_name for x in self.client.list_objects(self.bucket, self.remote_path + '/')
if x not in exclude), max_count))
count = itertools.count()
if verbose:
def out(name):
next(count)
print(name)
return name
else:
def out(name):
next(count)
return name

if verbose:
print('Deleting...')

names = (out(x.object_name)
for x in self.client.list_objects(self.bucket, self.remote_path + '/', recursive=True)
if x.object_name.split('/')[-1] not in exclude)

failed_deletes = list(
self.client.remove_objects(self.bucket, itertools.islice(names, max_count)))

print('Deleted: %i S3 objects' % next(count))
return failed_deletes
4 changes: 3 additions & 1 deletion datajoint/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def jobs(self):
return self._jobs

@property
def external_table(self):
def external(self):
"""
schema.external provides a view of the external hash table for the schema
:return: external table
Expand All @@ -237,6 +237,8 @@ def external_table(self):
self._external = ExternalTable(self.connection, self.database)
return self._external

external_table = external # for backward compatibility to pre-0.12.0


def create_virtual_module(module_name, schema_name, create_schema=False, create_tables=False, connection=None):
"""
Expand Down
1 change: 1 addition & 0 deletions tests/test_erd.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def test_dependencies():

@staticmethod
def test_erd():
assert_true(dj.erd.erd_active, 'Failed to import networkx and pydot')
erd = dj.ERD(schema, context=namespace)
graph = erd._make_graph()
assert_true(set(cls.__name__ for cls in (A, B, D, E, L)).issubset(graph.nodes()))
Expand Down