diff options
Diffstat (limited to 'src/subscleaner/subscleaner.py')
-rwxr-xr-x | src/subscleaner/subscleaner.py | 80 |
1 files changed, 39 insertions, 41 deletions
diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py index 0896d6c..504d43f 100755 --- a/src/subscleaner/subscleaner.py +++ b/src/subscleaner/subscleaner.py @@ -27,46 +27,44 @@ import chardet import pysrt ADS = [ - r".*nordvpn.*", - r".*a Card Shark AMERICASCARDROOM.*", - r".*OpenSubtitles.*", - r".*Advertise your product or brand here.*", - r".*Apóyanos y conviértete en miembro VIP Para.*", - r".*Addic7ed.*", - r".*argenteam.*", - r".*AllSubs.*", - r"Created and Encoded by.*", - r".*corrected.*by.*", - r".*Entre a AmericasCardroom. com Hoy.*", - r".*Everyone is intimidated by a shark. Become.*", - r".*Juegue Poker en Línea por Dinero Real.*", - r".*OpenSubtitles.*", - r".*Open Subtitles.*", - r".*MKV Player.*", - r".*MKV player.*", - r".*Resync.*for.*", - r".*Resync.*improved.*", - r".*Ripped?By.*", - r'.*Sigue "Community" en.*', - r".*Subtitles.*by.*", - r".*Subt?tulos.*por.*", - r".*Support us and become VIP member.*", - r".*Subs.*Team.*", - r".*subscene.*", - r".*Subtitulado por.*", - r".*subtitulamos.*", - r".*Synchronized.*by.*", - r".*Sincronizado y corregido por.*", - r".*subdivx.*", - r".*Sync.*Corrected.*", - r".*Sync.*corrections.*by.*", - r".*sync and corrections by.*" r".*Sync.*by.*", - r".*Una.*traducci?n.*de.*", - r".*tvsubtitles.*", - r".*Una.*traducci?n.*de.*", - "Tacho8", - r".*www. com.*", - r".*www. es.*", + re.compile(r"\bnordvpn\b", re.IGNORECASE), + re.compile(r"\ba Card Shark AMERICASCARDROOM\b", re.IGNORECASE), + re.compile(r"\bOpenSubtitles\b", re.IGNORECASE), + re.compile(r"\bAdvertise your product or brand here\b", re.IGNORECASE), + re.compile(r"\bApóyanos y conviértete en miembro VIP Para\b", re.IGNORECASE), + re.compile(r"\bAddic7ed\b", re.IGNORECASE), + re.compile(r"\bargenteam\b", re.IGNORECASE), + re.compile(r"\bAllSubs\b", re.IGNORECASE), + re.compile(r"\bCreated and Encoded by\b", re.IGNORECASE), + re.compile(r"\bcorrected\s+by\b", re.IGNORECASE), + re.compile(r"\bEntre a AmericasCardroom\.com Hoy\b", re.IGNORECASE), + re.compile(r"\bEveryone is intimidated by a shark\. Become\b", re.IGNORECASE), + re.compile(r"\bJuegue Poker en Línea por Dinero Real\b", re.IGNORECASE), + re.compile(r"\bOpen Subtitles\b", re.IGNORECASE), + re.compile(r"\bMKV Player\b", re.IGNORECASE), + re.compile(r"\bResync\s+for\b", re.IGNORECASE), + re.compile(r"\bResync\s+improved\b", re.IGNORECASE), + re.compile(r"\bRipped\s+By\b", re.IGNORECASE), + re.compile(r'\bSigue "Community" en\b', re.IGNORECASE), + re.compile(r"\bSubtitles\s+by\b", re.IGNORECASE), + re.compile(r"\bSubt[íi]tulos\s+por\b", re.IGNORECASE), + re.compile(r"\bSupport us and become VIP member\b", re.IGNORECASE), + re.compile(r"\bSubs\s+Team\b", re.IGNORECASE), + re.compile(r"\bsubscene\b", re.IGNORECASE), + re.compile(r"\bSubtitulado por\b", re.IGNORECASE), + re.compile(r"\bsubtitulamos\b", re.IGNORECASE), + re.compile(r"\bSynchronized\s+by\b", re.IGNORECASE), + re.compile(r"\bSincronizado y corregido por\b", re.IGNORECASE), + re.compile(r"\bsubdivx\b", re.IGNORECASE), + re.compile(r"\bSync\s+Corrected\b", re.IGNORECASE), + re.compile(r"\bSync\s+corrections\s+by\b", re.IGNORECASE), + re.compile(r"\bsync and corrections by\b", re.IGNORECASE), + re.compile(r"\bSync\s+by\b", re.IGNORECASE), + re.compile(r"\bUna\s+traducci[óo]n\s+de\b", re.IGNORECASE), + re.compile(r"\btvsubtitles\b", re.IGNORECASE), + re.compile(r"\bTacho8\b", re.IGNORECASE), + re.compile(r"\bwww\.\S+\.com\b", re.IGNORECASE), + re.compile(r"\bwww\.\S+\.es\b", re.IGNORECASE), ] @@ -80,7 +78,7 @@ def ads_in_line(line: str) -> bool: Returns: bool: True if the line contains an ad, False otherwise. """ - return any(re.match(ad, line, re.DOTALL) for ad in ADS) + return any(ad.search(line) for ad in ADS) def is_already_processed(filename: str) -> bool: |