Initial commit
This commit is contained in:
commit
8410389d6a
166
cleanmedia
Executable file
166
cleanmedia
Executable file
@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
#TODO: Sanity checks: Are files on the file system that the db does not know about?
|
||||
#TODO: Sanity checks: Are there thumbnails in the db that do not have corresponding media file entries?
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
import logging
|
||||
import typing
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError as e:
|
||||
print("Please install psycopg2")
|
||||
exit(1)
|
||||
try:
|
||||
import yaml
|
||||
except ImportError as e:
|
||||
print("Please install pyyaml / python3-yaml")
|
||||
exit(1)
|
||||
|
||||
def read_config(conf_file):
|
||||
try:
|
||||
with open(conf_file) as f:
|
||||
config = yaml.safe_load(f)
|
||||
except FileNotFoundError as e:
|
||||
errstr = f"Config file {conf_file} not found. Use the --help option to find out more."
|
||||
logging.error(errstr)
|
||||
exit(1)
|
||||
|
||||
if "media_api" not in config:
|
||||
logging.error("Missing section media_api")
|
||||
exit(1)
|
||||
|
||||
if "global" in config and "database" in config["global"]:
|
||||
CONN_STR = config["global"]["database"].get("connection_string", None)
|
||||
else:
|
||||
logging.debug("No database section, so we need the media_api specific connection string")
|
||||
CONN_STR = config["media_api"].get("connection_string", None)
|
||||
|
||||
if CONN_STR is None:
|
||||
logging.error("Did not find connection string to media database.")
|
||||
exit(1)
|
||||
|
||||
BASE_PATH = Path(config["media_api"].get("base_path", None))
|
||||
|
||||
if BASE_PATH is None:
|
||||
logging.error("Missing base_path in media_api")
|
||||
exit(1)
|
||||
return (BASE_PATH, CONN_STR)
|
||||
|
||||
|
||||
|
||||
class File:
|
||||
def __init__(self, media_repo, media_id, creation_ts, base64hash: str):
|
||||
# The MediaRepository in which this file is recorded
|
||||
self.repo = media_repo
|
||||
self.media_id = media_id
|
||||
# creation_ts is seconds since the epoch
|
||||
self.create_date = datetime.fromtimestamp(creation_ts)
|
||||
self.base64hash = base64hash
|
||||
|
||||
def fullpath(self):
|
||||
"""returns the directory in which the "file" and all thumbnails are located, or None if no file is known"""
|
||||
# TODO: Make a property, calculate on first usage and cache it?
|
||||
if not self.base64hash:
|
||||
return None
|
||||
return self.repo.media_path / self.base64hash[0:1] / self.base64hash[1:2] / self.base64hash[2:]
|
||||
|
||||
def delete(self):
|
||||
"""Delete db entries, and the file itself"""
|
||||
if self.fullpath() is None:
|
||||
logging.info(f"No known path for file id '{self.media_id}', cannot delete.")
|
||||
return
|
||||
for file in self.fullpath().glob('**/*'):
|
||||
file.unlink()
|
||||
self.fullpath().rmdir()
|
||||
logging.debug(f"Deleted directory {self.fullpath()}")
|
||||
#delete directory (self.fullpath())
|
||||
|
||||
def exists(self):
|
||||
"""returns True if the media file itself exists on the file system"""
|
||||
path = self.fullpath()
|
||||
if path is None:
|
||||
return False
|
||||
return (path / 'file').exists()
|
||||
|
||||
def has_thumbnail(self):
|
||||
cur = self.repo.conn.cursor()
|
||||
# media_id | media_origin | content_type | file_size_bytes | creation_ts | upload_name | base64hash | user_id
|
||||
res = cur.execute(f"select COUNT(media_id) from mediaapi_thumbnail WHERE media_id='{self.media_id}';")
|
||||
row = cur.fetchone()
|
||||
return(row[0])
|
||||
|
||||
class MediaRepository:
|
||||
|
||||
def __init__(self, media_path: Path, connection_string: str):
|
||||
# media_path is a pathlib.Path
|
||||
self.media_path = media_path
|
||||
if not self.media_path.is_absolute():
|
||||
logging.warn("The media path is relative, make sure you run this script in the correct directory!")
|
||||
if not self.media_path.is_dir():
|
||||
raise Exception(f"The configured media dir cannot be found!")
|
||||
|
||||
# psql db connection
|
||||
self.conn = None
|
||||
self.db_conn_string = connection_string
|
||||
self.connect_db();
|
||||
|
||||
def connect_db(self):
|
||||
#postgresql://user:pass@hostname/database?params
|
||||
# postgres://dendrite:dendrite@localhost/dendrite?
|
||||
if self.db_conn_string is None or not self.db_conn_string.startswith("postgres://"):
|
||||
errstr = "DB connection not a postgres one"
|
||||
logging.error(errstr)
|
||||
raise ValueError(errstr)
|
||||
self.conn = psycopg2.connect(self.db_conn_string)
|
||||
|
||||
def get_remote_media(self):
|
||||
cur = self.conn.cursor()
|
||||
# media_id | media_origin | content_type | file_size_bytes | creation_ts | upload_name | base64hash | user_id
|
||||
res = cur.execute("select media_id, creation_ts, base64hash from mediaapi_media_repository WHERE user_id = '';")
|
||||
#select * from mediaapi_media_repository WHERE user_id = '';
|
||||
files = []
|
||||
for row in cur.fetchall():
|
||||
# creation_ts is ms since the epoch, so we need to make sec out of it
|
||||
f = File(self, row[0], row[1]//1000, row[2])
|
||||
files.append(f)
|
||||
|
||||
cur.close()
|
||||
return files
|
||||
# mediaapi_thumbnail:
|
||||
# media_id | media_origin | content_type | file_size_bytes | creation_ts | width | height | resize_method
|
||||
|
||||
def parse_options():
|
||||
loglevel=logging.INFO # default
|
||||
parser = argparse.ArgumentParser(
|
||||
prog = 'cleanmedia',
|
||||
description = 'Deletes older remote media files from dendrite servers',
|
||||
epilog = 'Works only with postgres databases.')
|
||||
parser.add_argument('-c', '--config', default="config.yaml", help="location of the dendrite.yaml config file.")
|
||||
parser.add_argument('-n', '--dryrun', action='store_true', help="Dry run (don't actually modify any files).")
|
||||
parser.add_argument('-d', '--debug', action='store_true', help="Turn debug output on.")
|
||||
args = parser.parse_args()
|
||||
if args.debug:
|
||||
loglevel=logging.DEBUG
|
||||
logging.basicConfig(level=loglevel, format= '%(levelname)s - %(message)s')
|
||||
return args
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_options()
|
||||
|
||||
(MEDIA_PATH, CONN_STR) = read_config(args.config)
|
||||
mr = MediaRepository(MEDIA_PATH, CONN_STR)
|
||||
cleantime = datetime.today() - timedelta(days=30)
|
||||
files = mr.get_remote_media()
|
||||
for file in files:
|
||||
if file.create_date < cleantime:
|
||||
print (file.has_thumbnail(), file.base64hash)
|
||||
if not file.exists():
|
||||
logging.info(f"file id {file.media_id} does not physically exist (path {file.fullpath()})")
|
||||
if not args.dryrun:
|
||||
file.delete()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user