cleanmedia: Allow to also delete local media files
Using the --local option we can also delete local media. We check which media files are used for avatar images and never purge those though. Local media means here, media that have been upload by users from our home server. Fixes: #1
This commit is contained in:
parent
1cb7dad3c2
commit
055d9b4202
40
cleanmedia
40
cleanmedia
@ -96,6 +96,8 @@ class MediaRepository:
|
|||||||
logging.warn("The media path is relative, make sure you run this script in the correct directory!")
|
logging.warn("The media path is relative, make sure you run this script in the correct directory!")
|
||||||
if not self.media_path.is_dir():
|
if not self.media_path.is_dir():
|
||||||
raise Exception("The configured media dir cannot be found!")
|
raise Exception("The configured media dir cannot be found!")
|
||||||
|
# List of current avatar imgs. init empty
|
||||||
|
self._avatar_media_ids: List[str] = []
|
||||||
|
|
||||||
self.db_conn_string = connection_string # psql db connection
|
self.db_conn_string = connection_string # psql db connection
|
||||||
self.conn = self.connect_db()
|
self.conn = self.connect_db()
|
||||||
@ -110,10 +112,16 @@ class MediaRepository:
|
|||||||
raise ValueError(errstr)
|
raise ValueError(errstr)
|
||||||
return psycopg2.connect(self.db_conn_string)
|
return psycopg2.connect(self.db_conn_string)
|
||||||
|
|
||||||
def get_remote_media(self) -> List[File]:
|
def get_media(self, local: bool = False) -> List[File]:
|
||||||
|
"""Return List[File] of remote media or ALL media if local==True"""
|
||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
# media_id | media_origin | content_type | file_size_bytes | creation_ts | upload_name | base64hash | user_id
|
# media_id | media_origin | content_type | file_size_bytes | creation_ts | upload_name | base64hash | user_id
|
||||||
cur.execute("select media_id, creation_ts, base64hash from mediaapi_media_repository WHERE user_id = '';")
|
sql_str = "SELECT media_id, creation_ts, base64hash from mediaapi_media_repository"
|
||||||
|
if not local:
|
||||||
|
# only fetch remote media where user_id is empty
|
||||||
|
sql_str += " WHERE user_id = ''"
|
||||||
|
sql_str += ";"
|
||||||
|
cur.execute(sql_str)
|
||||||
files = []
|
files = []
|
||||||
for row in cur.fetchall():
|
for row in cur.fetchall():
|
||||||
# creation_ts is ms since the epoch, so convert to seconds
|
# creation_ts is ms since the epoch, so convert to seconds
|
||||||
@ -121,6 +129,23 @@ class MediaRepository:
|
|||||||
files.append(f)
|
files.append(f)
|
||||||
return files
|
return files
|
||||||
|
|
||||||
|
def get_avatar_images(self) -> List[str]:
|
||||||
|
"""Get a list of media_id which are current avatar images
|
||||||
|
|
||||||
|
We don't want to clean up those. Save & cache them internally.
|
||||||
|
"""
|
||||||
|
media_id = []
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute("select avatar_url from userapi_profiles;")
|
||||||
|
for row in cur.fetchall():
|
||||||
|
url = row[0] # mxc://matrix.org/6e627f4c538563
|
||||||
|
try:
|
||||||
|
media_id.append(url[url.rindex("/") + 1:])
|
||||||
|
except ValueError:
|
||||||
|
logging.warn("No slash in URL '%s'!", url)
|
||||||
|
self._avatar_media_ids = media_id
|
||||||
|
return self._avatar_media_ids
|
||||||
|
|
||||||
def sanity_check_thumbnails(self) -> None:
|
def sanity_check_thumbnails(self) -> None:
|
||||||
"""Warn if we have thumbnails in the db that do not refer to existing media"""
|
"""Warn if we have thumbnails in the db that do not refer to existing media"""
|
||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
@ -172,6 +197,8 @@ def parse_options() -> argparse.Namespace:
|
|||||||
parser.add_argument('-t', '--days', dest="days",
|
parser.add_argument('-t', '--days', dest="days",
|
||||||
default="30", type=int,
|
default="30", type=int,
|
||||||
help="Keep remote media for <DAYS> days.")
|
help="Keep remote media for <DAYS> days.")
|
||||||
|
parser.add_argument('-l', '--local', action='store_true',
|
||||||
|
help="Also include local (ie, from *our* users) media files when purging.")
|
||||||
parser.add_argument('-n', '--dryrun', action='store_true',
|
parser.add_argument('-n', '--dryrun', action='store_true',
|
||||||
help="Dry run (don't actually modify any files).")
|
help="Dry run (don't actually modify any files).")
|
||||||
parser.add_argument('-d', '--debug', action='store_true', help="Turn debug output on.")
|
parser.add_argument('-d', '--debug', action='store_true', help="Turn debug output on.")
|
||||||
@ -186,13 +213,18 @@ if __name__ == '__main__':
|
|||||||
args = parse_options()
|
args = parse_options()
|
||||||
(MEDIA_PATH, CONN_STR) = read_config(args.config)
|
(MEDIA_PATH, CONN_STR) = read_config(args.config)
|
||||||
mr = MediaRepository(MEDIA_PATH, CONN_STR)
|
mr = MediaRepository(MEDIA_PATH, CONN_STR)
|
||||||
|
# Sanity checks
|
||||||
mr.sanity_check_thumbnails() # warn in case of superfluous thumbnails
|
mr.sanity_check_thumbnails() # warn in case of superfluous thumbnails
|
||||||
|
# Preps
|
||||||
|
if args.local:
|
||||||
|
# populate the cache of current avt img. so we don't delete them
|
||||||
|
mr.get_avatar_images()
|
||||||
# ------real main part------------
|
# ------real main part------------
|
||||||
cleantime = datetime.today() - timedelta(days=args.days)
|
cleantime = datetime.today() - timedelta(days=args.days)
|
||||||
logging.info("Deleting remote media older than %s", cleantime)
|
logging.info("Deleting remote media older than %s", cleantime)
|
||||||
num_deleted = 0
|
num_deleted = 0
|
||||||
files = mr.get_remote_media()
|
files = mr.get_media(args.local)
|
||||||
for file in files:
|
for file in [f for f in files if f.media_id not in mr._avatar_media_ids]:
|
||||||
if file.create_date < cleantime:
|
if file.create_date < cleantime:
|
||||||
num_deleted += 1
|
num_deleted += 1
|
||||||
if args.dryrun: # the great pretender
|
if args.dryrun: # the great pretender
|
||||||
|
Loading…
x
Reference in New Issue
Block a user