From 5387c348a6a8401bad2f717f4d464f1549d5b405 Mon Sep 17 00:00:00 2001 From: Roger Gonzalez Date: Sun, 3 Mar 2024 17:52:17 -0300 Subject: Added subscleaner script --- src/subscleaner/subscleaner.py | 150 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100755 src/subscleaner/subscleaner.py (limited to 'src/subscleaner/subscleaner.py') diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py new file mode 100755 index 0000000..6e92fd5 --- /dev/null +++ b/src/subscleaner/subscleaner.py @@ -0,0 +1,150 @@ +#!/usr/bin/python3 +"""Main Subscleaner module.""" + +""" +Subscleaner. +Copyright (C) 2023 Roger Gonzalez + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +""" + +import os +import re +import sys +import time + +import chardet +import pysrt + +ADS = [ + r".*nordvpn.*", + r".*a Card Shark AMERICASCARDROOM.*", + r".*OpenSubtitles.*", + r".*Advertise your product or brand here.*", + r".*Apóyanos y conviértete en miembro VIP Para.*", + r".*Addic7ed.*", + r".*argenteam.*", + r".*AllSubs.*", + r"Created and Encoded by.*", + r".*corrected.*by.*", + r".*Entre a AmericasCardroom. com Hoy.*", + r".*Everyone is intimidated by a shark. Become.*", + r".*Juegue Poker en Línea por Dinero Real.*", + r".*OpenSubtitles.*", + r".*Open Subtitles.*", + r".*MKV Player.*", + r".*MKV player.*", + r".*Resync.*for.*", + r".*Resync.*improved.*", + r".*Ripped?By.*", + r'.*Sigue "Community" en.*', + r".*Subtitles.*by.*", + r".*Subt?tulos.*por.*", + r".*Support us and become VIP member.*", + r".*Subs.*Team.*", + r".*subscene.*", + r".*Subtitulado por.*", + r".*subtitulamos.*", + r".*Synchronized.*by.*", + r".*Sincronizado y corregido por.*", + r".*subdivx.*", + r".*Sync.*Corrected.*", + r".*Sync.*corrections.*by.*", + r".*sync and corrections by.*" r".*Sync.*by.*", + r".*Una.*traducci?n.*de.*", + r".*tvsubtitles.*", + r".*Una.*traducci?n.*de.*", + "Tacho8", + r".*www. com.*", + r".*www. es.*", +] + + +def ads_in_line(line: str) -> bool: + """ + Check if the given line contains an ad. + + Args: + line (str): The line of text to be checked. + + Returns: + bool: True if the line contains an ad, False otherwise. + """ + return any(re.match(ad, line, re.DOTALL) for ad in ADS) + + +def process_file(filename): + """ + Process a subtitle file to remove ads. + + Args: + filename (str): The path to the subtitle file. + + Returns: + bool: True if the file was modified, False otherwise. + """ + try: + created = os.path.getctime(filename) + already_processed = time.mktime( + time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"), + ) + if created < already_processed: + print(f"Already processed {filename}") + return False + + print(f"Analyzing: {filename}") + + with open(filename, "rb") as f: + encoding = chardet.detect(f.read())["encoding"] + + subs = pysrt.open(filename, encoding=encoding) + modified = False + for i, line in enumerate(subs): + if ads_in_line(line.text): + print(f"Removing: {line}\n") + del subs[i] + modified = True + + if modified: + print(f"Saving {filename}") + subs.save(filename) + return True + return False + except Exception as e: + print(f"Error processing {filename}: {e}") + return False + + +def main(): + """ + Process subtitle files to remove ads. + + Read filenames from standard input, process each file to remove ads, + and print the result. Keep track of the modified files and print + a summary at the end. + """ + modified_files = [] + print("Starting script") + for filename in sys.stdin: + filename = filename.strip() + if process_file(filename): + modified_files.append(filename) + + if modified_files: + print(f"Modified {len(modified_files)} files") + print("Done") + + +if __name__ == "__main__": + main() -- cgit v1.2.3