From 5f63efa9e9225dbd7513863709225d362f062c45 Mon Sep 17 00:00:00 2001 From: yomguy Date: Sun, 31 Oct 2010 01:20:17 +0000 Subject: [PATCH] fix various bug, add blacklist git-svn-id: http://svn.parisson.org/svn/tools/trunk@122 457c0346-1240-4656-8a5a-9edca8063506 --- various/deegger.py | 72 +++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/various/deegger.py b/various/deegger.py index a214fd7..3137d0c 100755 --- a/various/deegger.py +++ b/various/deegger.py @@ -70,6 +70,25 @@ def prog_info(): """ % version +BLACKLIST = ['http://www.mobzy.us/',] + + +class Logger: + """A logging object""" + + def __init__(self, file): + import logging + self.logger = logging.getLogger('myapp') + self.hdlr = logging.FileHandler(file) + self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + self.hdlr.setFormatter(self.formatter) + self.logger.addHandler(self.hdlr) + self.logger.setLevel(logging.INFO) + + def write(self, message): + self.logger.info(message) + + class DeeGGer(Thread): def __init__(self, format, text, m3u_dir): @@ -82,7 +101,7 @@ class DeeGGer(Thread): self.m3u_file = self.m3u_dir + os.sep + 'deegger_' + self.text.replace('/', '_') + '.' + self.format + '.m3u' self.m3u = M3UPlaylist(self.m3u_file) - self.n = 4 + self.n = 20 self.media_q = 'intitle:"index.of" "parent directory" "size" "last modified" "description" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery -intitle:"last modified"' % (self.format, self.format, self.format) #self.media_q = 'intitle:"index.of" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery' % (self.format, self.format, self.format) self.q = '%s %s' % (self.text, self.media_q) @@ -107,52 +126,47 @@ class DeeGGer(Thread): def run(self): print self.results print len(self.results) - i = 0 parsers = [] media_list = [] for result in self.results: - if result: - parsers.append(UrlMediaParser(self.format, self.text, result)) - try: - list = parsers[i].start() - if list: - i += 1 - self.m3u.put(list) - except: - continue + url = result['unescapedUrl'] + if not url in BLACKLIST: + parser = UrlMediaParser(self.m3u, self.format, self.text, url) + try: + list = parser.start() + + except: + continue self.m3u.close() -class M3UPlaylist: +class M3UPlaylist(object): def __init__(self, m3u_file): self.m3u_file = m3u_file self.m3u = open(self.m3u_file, 'w') - self.init_m3u() - - def init_m3u(self): self.m3u.write('#EXTM3U\n') - self.m3u.flush() + #self.m3u.flush() - def put(self, url_list): - #print url_list - for url in url_list: - info = '#EXTINF:'',%s' % (url +'\n') - self.m3u.write(info) - self.m3u.write(url + '\n') - self.m3u.flush + def put(self, url): + print 'adding : ' + url + info = '#EXTINF:'',%s' % (url +'\n') + self.m3u.write(info) + self.m3u.write(url + '\n') + #self.m3u.flush() def close(self): self.m3u.close() class UrlMediaParser(Thread): - def __init__(self, format, text, results): + def __init__(self, m3u, format, text, url): Thread.__init__(self) + self.m3u = m3u self.format = format self.text = text - self.results = results - self.url = self.results['unescapedUrl'] + self.url = url + def is_in_multiple_case(self, _string, text): return _string in text \ @@ -167,6 +181,7 @@ class UrlMediaParser(Thread): def run(self): media_list = [] if self.url: + print 'deegging : ' + self.url try: data = urllib.urlopen(self.url).read() for line in data.split("\012"): @@ -174,10 +189,9 @@ class UrlMediaParser(Thread): s = re.compile('href=".*\.'+ format + '">').search(line,1) if s: file_name = line[s.start():s.end()].split('"')[1] - print file_name if self.is_in_multiple_case(self.text, file_name) or \ - self.is_in_multiple_case(self.text, self.url): - media_list.append(self.url + file_name) + self.is_in_multiple_case(self.text, self.url): + self.m3u.put(self.url + file_name) except: pass -- 2.39.5