summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsrc/subscleaner/subscleaner.py80
1 files changed, 39 insertions, 41 deletions
diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py
index 0896d6c..504d43f 100755
--- a/src/subscleaner/subscleaner.py
+++ b/src/subscleaner/subscleaner.py
@@ -27,46 +27,44 @@ import chardet
import pysrt
ADS = [
- r".*nordvpn.*",
- r".*a Card Shark AMERICASCARDROOM.*",
- r".*OpenSubtitles.*",
- r".*Advertise your product or brand here.*",
- r".*Apóyanos y conviértete en miembro VIP Para.*",
- r".*Addic7ed.*",
- r".*argenteam.*",
- r".*AllSubs.*",
- r"Created and Encoded by.*",
- r".*corrected.*by.*",
- r".*Entre a AmericasCardroom. com Hoy.*",
- r".*Everyone is intimidated by a shark. Become.*",
- r".*Juegue Poker en Línea por Dinero Real.*",
- r".*OpenSubtitles.*",
- r".*Open Subtitles.*",
- r".*MKV Player.*",
- r".*MKV player.*",
- r".*Resync.*for.*",
- r".*Resync.*improved.*",
- r".*Ripped?By.*",
- r'.*Sigue "Community" en.*',
- r".*Subtitles.*by.*",
- r".*Subt?tulos.*por.*",
- r".*Support us and become VIP member.*",
- r".*Subs.*Team.*",
- r".*subscene.*",
- r".*Subtitulado por.*",
- r".*subtitulamos.*",
- r".*Synchronized.*by.*",
- r".*Sincronizado y corregido por.*",
- r".*subdivx.*",
- r".*Sync.*Corrected.*",
- r".*Sync.*corrections.*by.*",
- r".*sync and corrections by.*" r".*Sync.*by.*",
- r".*Una.*traducci?n.*de.*",
- r".*tvsubtitles.*",
- r".*Una.*traducci?n.*de.*",
- "Tacho8",
- r".*www. com.*",
- r".*www. es.*",
+ re.compile(r"\bnordvpn\b", re.IGNORECASE),
+ re.compile(r"\ba Card Shark AMERICASCARDROOM\b", re.IGNORECASE),
+ re.compile(r"\bOpenSubtitles\b", re.IGNORECASE),
+ re.compile(r"\bAdvertise your product or brand here\b", re.IGNORECASE),
+ re.compile(r"\bApóyanos y conviértete en miembro VIP Para\b", re.IGNORECASE),
+ re.compile(r"\bAddic7ed\b", re.IGNORECASE),
+ re.compile(r"\bargenteam\b", re.IGNORECASE),
+ re.compile(r"\bAllSubs\b", re.IGNORECASE),
+ re.compile(r"\bCreated and Encoded by\b", re.IGNORECASE),
+ re.compile(r"\bcorrected\s+by\b", re.IGNORECASE),
+ re.compile(r"\bEntre a AmericasCardroom\.com Hoy\b", re.IGNORECASE),
+ re.compile(r"\bEveryone is intimidated by a shark\. Become\b", re.IGNORECASE),
+ re.compile(r"\bJuegue Poker en Línea por Dinero Real\b", re.IGNORECASE),
+ re.compile(r"\bOpen Subtitles\b", re.IGNORECASE),
+ re.compile(r"\bMKV Player\b", re.IGNORECASE),
+ re.compile(r"\bResync\s+for\b", re.IGNORECASE),
+ re.compile(r"\bResync\s+improved\b", re.IGNORECASE),
+ re.compile(r"\bRipped\s+By\b", re.IGNORECASE),
+ re.compile(r'\bSigue "Community" en\b', re.IGNORECASE),
+ re.compile(r"\bSubtitles\s+by\b", re.IGNORECASE),
+ re.compile(r"\bSubt[íi]tulos\s+por\b", re.IGNORECASE),
+ re.compile(r"\bSupport us and become VIP member\b", re.IGNORECASE),
+ re.compile(r"\bSubs\s+Team\b", re.IGNORECASE),
+ re.compile(r"\bsubscene\b", re.IGNORECASE),
+ re.compile(r"\bSubtitulado por\b", re.IGNORECASE),
+ re.compile(r"\bsubtitulamos\b", re.IGNORECASE),
+ re.compile(r"\bSynchronized\s+by\b", re.IGNORECASE),
+ re.compile(r"\bSincronizado y corregido por\b", re.IGNORECASE),
+ re.compile(r"\bsubdivx\b", re.IGNORECASE),
+ re.compile(r"\bSync\s+Corrected\b", re.IGNORECASE),
+ re.compile(r"\bSync\s+corrections\s+by\b", re.IGNORECASE),
+ re.compile(r"\bsync and corrections by\b", re.IGNORECASE),
+ re.compile(r"\bSync\s+by\b", re.IGNORECASE),
+ re.compile(r"\bUna\s+traducci[óo]n\s+de\b", re.IGNORECASE),
+ re.compile(r"\btvsubtitles\b", re.IGNORECASE),
+ re.compile(r"\bTacho8\b", re.IGNORECASE),
+ re.compile(r"\bwww\.\S+\.com\b", re.IGNORECASE),
+ re.compile(r"\bwww\.\S+\.es\b", re.IGNORECASE),
]
@@ -80,7 +78,7 @@ def ads_in_line(line: str) -> bool:
Returns:
bool: True if the line contains an ad, False otherwise.
"""
- return any(re.match(ad, line, re.DOTALL) for ad in ADS)
+ return any(ad.search(line) for ad in ADS)
def is_already_processed(filename: str) -> bool: