]> git.parisson.com Git - yomguy-tools.git/commitdiff
fix url, add simple url parsing
authoryomguy <yomguy@457c0346-1240-4656-8a5a-9edca8063506>
Fri, 1 May 2009 23:55:13 +0000 (23:55 +0000)
committeryomguy <yomguy@457c0346-1240-4656-8a5a-9edca8063506>
Fri, 1 May 2009 23:55:13 +0000 (23:55 +0000)
git-svn-id: http://svn.parisson.org/svn/tools/trunk@63 457c0346-1240-4656-8a5a-9edca8063506

various/deegger.py
various/deegger_url.py [new file with mode: 0755]

index f48778ff2ecff82b79d6d5de3b5e025239ef92c4..f5d9d89ae051c31b681fd7d3168395e45ad85541 100755 (executable)
@@ -79,11 +79,12 @@ class DeeGGer(Thread):
         self.m3u_dir = m3u_dir
         if not os.path.exists(self.m3u_dir):
             os.makedirs(self.m3u_dir)
-        self.m3u_file = self.m3u_dir + os.sep + 'deegger_' + self.text + '.' + self.format + '.m3u'
+        self.m3u_file = self.m3u_dir + os.sep + 'deegger_' + self.text.replace('/', '_') + '.' + self.format + '.m3u'
         self.m3u = M3UPlaylist(self.m3u_file)
              
         self.n = range(0,128)
-        self.media_q = 'intitle:"index.of" "parent directory" "size" "last modified" "description" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery -intitle:"last modified"' % (self.format, self.format, self.format)
+        #self.media_q = 'intitle:"index.of" "parent directory" "size" "last modified" "description" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery -intitle:"last modified"' % (self.format, self.format, self.format)
+        self.media_q = 'intitle:"index.of" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery' % (self.format, self.format, self.format)
         self.q = '%s %s' % (self.text, self.media_q)
         self.results = self.google_search()
 
@@ -127,6 +128,8 @@ class M3UPlaylist:
             self.m3u.write(url + '\n')
             self.m3u.flush
 
+    def close(self):
+        self.m3u.close()
 
 class UrlMediaParser(Thread):
 
@@ -136,6 +139,7 @@ class UrlMediaParser(Thread):
         self.text = text
         self.results = results
         self.m3u = m3u
+        self.url = self.results['unescapedUrl']
 
     def is_in_multiple_case(self, _string, text):
         return _string in text \
@@ -149,24 +153,24 @@ class UrlMediaParser(Thread):
 
     def run(self):
         media_list = []
-        url = self.results['unescapedUrl']
-        if url:
+        if self.url:
             try:
-                data = urllib.urlopen(url).read()
+                data = urllib.urlopen(self.url).read()
                 for line in data.split("\012"):
                     for format in self.get_multiple_case_string(self.format):
-                        s = re.compile('HREF=".*\.'+ format + '">').search(line,1)
+                        s = re.compile('href=".*\.'+ format + '">').search(line,1)
                         if s:
                             file_name = line[s.start():s.end()].split('"')[1]
-                            if self.is_in_multiple_case(self.text, file_name) \
-                                or self.is_in_multiple_case(self.text, url):
-                                media_list.append(url + file_name)
+                            if self.is_in_multiple_case(self.text, file_name) or \
+                               self.is_in_multiple_case(self.text, self.url):
+                                media_list.append(self.url + file_name)
             except:
                 pass
                                           
             if media_list:
                 #print media_list
                 self.m3u.put(media_list)
+                self.m3u.close()
 
 
 def main():
diff --git a/various/deegger_url.py b/various/deegger_url.py
new file mode 100755 (executable)
index 0000000..066795d
--- /dev/null
@@ -0,0 +1,14 @@
+#!/usr/bin/python
+
+import sys
+from deegger import *
+
+format = sys.argv[1]
+text = sys.argv[2]
+url = sys.argv[3]
+m3u_dir = sys.argv[4]
+m3u = M3UPlaylist(m3u_dir + os.sep + 'deegger_' + url[7:].replace('/', '_') + '.m3u')
+
+u = UrlMediaParser(format, text, {'unescapedUrl': url}, m3u)
+u.start()
+    
\ No newline at end of file