From: yomguy Date: Fri, 1 May 2009 23:55:13 +0000 (+0000) Subject: fix url, add simple url parsing X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=bffdb93181fa386f5fbff2449e248a20d9e9ed9a;p=tools.git fix url, add simple url parsing git-svn-id: http://svn.parisson.org/svn/tools/trunk@63 457c0346-1240-4656-8a5a-9edca8063506 --- diff --git a/various/deegger.py b/various/deegger.py index f48778f..f5d9d89 100755 --- a/various/deegger.py +++ b/various/deegger.py @@ -79,11 +79,12 @@ class DeeGGer(Thread): self.m3u_dir = m3u_dir if not os.path.exists(self.m3u_dir): os.makedirs(self.m3u_dir) - self.m3u_file = self.m3u_dir + os.sep + 'deegger_' + self.text + '.' + self.format + '.m3u' + self.m3u_file = self.m3u_dir + os.sep + 'deegger_' + self.text.replace('/', '_') + '.' + self.format + '.m3u' self.m3u = M3UPlaylist(self.m3u_file) self.n = range(0,128) - self.media_q = 'intitle:"index.of" "parent directory" "size" "last modified" "description" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery -intitle:"last modified"' % (self.format, self.format, self.format) + #self.media_q = 'intitle:"index.of" "parent directory" "size" "last modified" "description" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery -intitle:"last modified"' % (self.format, self.format, self.format) + self.media_q = 'intitle:"index.of" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery' % (self.format, self.format, self.format) self.q = '%s %s' % (self.text, self.media_q) self.results = self.google_search() @@ -127,6 +128,8 @@ class M3UPlaylist: self.m3u.write(url + '\n') self.m3u.flush + def close(self): + self.m3u.close() class UrlMediaParser(Thread): @@ -136,6 +139,7 @@ class UrlMediaParser(Thread): self.text = text self.results = results self.m3u = m3u + self.url = self.results['unescapedUrl'] def is_in_multiple_case(self, _string, text): return _string in text \ @@ -149,24 +153,24 @@ class UrlMediaParser(Thread): def run(self): media_list = [] - url = self.results['unescapedUrl'] - if url: + if self.url: try: - data = urllib.urlopen(url).read() + data = urllib.urlopen(self.url).read() for line in data.split("\012"): for format in self.get_multiple_case_string(self.format): - s = re.compile('HREF=".*\.'+ format + '">').search(line,1) + s = re.compile('href=".*\.'+ format + '">').search(line,1) if s: file_name = line[s.start():s.end()].split('"')[1] - if self.is_in_multiple_case(self.text, file_name) \ - or self.is_in_multiple_case(self.text, url): - media_list.append(url + file_name) + if self.is_in_multiple_case(self.text, file_name) or \ + self.is_in_multiple_case(self.text, self.url): + media_list.append(self.url + file_name) except: pass if media_list: #print media_list self.m3u.put(media_list) + self.m3u.close() def main(): diff --git a/various/deegger_url.py b/various/deegger_url.py new file mode 100755 index 0000000..066795d --- /dev/null +++ b/various/deegger_url.py @@ -0,0 +1,14 @@ +#!/usr/bin/python + +import sys +from deegger import * + +format = sys.argv[1] +text = sys.argv[2] +url = sys.argv[3] +m3u_dir = sys.argv[4] +m3u = M3UPlaylist(m3u_dir + os.sep + 'deegger_' + url[7:].replace('/', '_') + '.m3u') + +u = UrlMediaParser(format, text, {'unescapedUrl': url}, m3u) +u.start() + \ No newline at end of file