-
Notifications
You must be signed in to change notification settings - Fork 405
Expand file tree
/
Copy pathCleanFiles.py
More file actions
executable file
·88 lines (73 loc) · 3.14 KB
/
CleanFiles.py
File metadata and controls
executable file
·88 lines (73 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3
# Contest Management System - http://cms-dev.github.io/
# Copyright © 2016 Luca Versari <veluca93@gmail.com>
# Copyright © 2016 Stefano Maggiolo <s.maggiolo@gmail.com>
# Copyright © 2018 Luca Wehrstedt <luca.wehrstedt@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""This script scans the whole database for file objects references
and removes unreferenced file objects from the file store. If required,
it also replaces all the executable digests in the database with a
tombstone digest, to make executables removable in the clean pass.
"""
import argparse
import logging
import sys
from cms.db import SessionGen, Session, Digest, Executable, enumerate_files
from cms.db.filecacher import FileCacher
logger = logging.getLogger()
def make_tombstone(session: Session):
count = 0
for exe in session.query(Executable).all():
exe: Executable
if exe.digest != Digest.TOMBSTONE:
count += 1
exe.digest = Digest.TOMBSTONE
logger.info("Replaced %d executables with the tombstone.", count)
def clean_files(session: Session, dry_run: bool):
filecacher = FileCacher()
files = set(file[0] for file in filecacher.list())
logger.info("A total number of %d files are present in the file store",
len(files))
found_digests = enumerate_files(session)
logger.info("Found %d digests while scanning", len(found_digests))
files -= found_digests
logger.info("%d digests are orphan.", len(files))
total_size = 0
for orphan in files:
total_size += filecacher.get_size(orphan)
logger.info("Orphan files take %s bytes of disk space",
"{:,}".format(total_size))
if not dry_run:
for count, orphan in enumerate(files):
filecacher.delete(orphan)
if count % 100 == 0:
logger.info("%d files deleted from the file store", count)
logger.info("All orphan files have been deleted")
def main():
parser = argparse.ArgumentParser(
description="Remove unused file objects from the database. "
"If -t is specified, also replace all executables with the tombstone")
parser.add_argument("-t", "--tombstone", action="store_true")
parser.add_argument("-n", "--dry-run", action="store_true")
args = parser.parse_args()
with SessionGen() as session:
if args.tombstone:
make_tombstone(session)
clean_files(session, args.dry_run)
if not args.dry_run:
session.commit()
return 0
if __name__ == "__main__":
sys.exit(main())