]> git.parisson.com Git - yomguy-tools.git/commitdiff
fix various bug, add blacklist
authoryomguy <yomguy@457c0346-1240-4656-8a5a-9edca8063506>
Sun, 31 Oct 2010 01:20:17 +0000 (01:20 +0000)
committeryomguy <yomguy@457c0346-1240-4656-8a5a-9edca8063506>
Sun, 31 Oct 2010 01:20:17 +0000 (01:20 +0000)
git-svn-id: http://svn.parisson.org/svn/tools/trunk@122 457c0346-1240-4656-8a5a-9edca8063506

various/deegger.py

index a214fd7a9d3e1886b7614783f38c50733a9a413e..3137d0ce8bf89c588b413510391ce14dd1a773b6 100755 (executable)
@@ -70,6 +70,25 @@ def prog_info():
  """ % version
 
 
+BLACKLIST = ['http://www.mobzy.us/',]
+
+
+class Logger:
+    """A logging object"""
+    
+    def __init__(self, file):
+        import logging
+        self.logger = logging.getLogger('myapp')
+        self.hdlr = logging.FileHandler(file)
+        self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
+        self.hdlr.setFormatter(self.formatter)
+        self.logger.addHandler(self.hdlr)
+        self.logger.setLevel(logging.INFO)
+
+    def write(self, message):
+        self.logger.info(message)
+
+
 class DeeGGer(Thread):
 
     def __init__(self, format, text, m3u_dir):
@@ -82,7 +101,7 @@ class DeeGGer(Thread):
         self.m3u_file = self.m3u_dir + os.sep + 'deegger_' + self.text.replace('/', '_') + '.' + self.format + '.m3u'
         self.m3u = M3UPlaylist(self.m3u_file)
 
-        self.n = 4
+        self.n = 20
         self.media_q = 'intitle:"index.of" "parent directory" "size" "last modified" "description" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery -intitle:"last modified"' % (self.format, self.format, self.format)
         #self.media_q = 'intitle:"index.of" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery' % (self.format, self.format, self.format)
         self.q = '%s %s' % (self.text, self.media_q)
@@ -107,52 +126,47 @@ class DeeGGer(Thread):
     def run(self):
         print self.results
         print len(self.results)
-        i = 0
         parsers = []
         media_list = []
         for result in self.results:
-            if result:
-                parsers.append(UrlMediaParser(self.format, self.text, result))
-                try:
-                    list = parsers[i].start()
-                    if list:
-                        i += 1
-                    self.m3u.put(list)
-                except:
-                    continue
+            url = result['unescapedUrl']
+            if not url in BLACKLIST:
+                parser = UrlMediaParser(self.m3u, self.format, self.text, url)
+            try:
+                list = parser.start()
+                
+            except:
+                continue
         self.m3u.close()
 
 
-class M3UPlaylist:
+class M3UPlaylist(object):
 
     def __init__(self, m3u_file):
         self.m3u_file = m3u_file
         self.m3u = open(self.m3u_file, 'w')
-        self.init_m3u()
-
-    def init_m3u(self):
         self.m3u.write('#EXTM3U\n')
-        self.m3u.flush()
+        #self.m3u.flush()
 
-    def put(self, url_list):
-        #print url_list
-        for url in url_list:
-            info = '#EXTINF:'',%s' % (url +'\n')
-            self.m3u.write(info)
-            self.m3u.write(url + '\n')
-            self.m3u.flush
+    def put(self, url):
+        print 'adding : ' + url
+        info = '#EXTINF:'',%s' % (url +'\n')
+        self.m3u.write(info)
+        self.m3u.write(url + '\n')
+        #self.m3u.flush()
 
     def close(self):
         self.m3u.close()
 
 class UrlMediaParser(Thread):
 
-    def __init__(self, format, text, results):
+    def __init__(self, m3u, format, text, url):
         Thread.__init__(self)
+        self.m3u = m3u
         self.format = format
         self.text = text
-        self.results = results
-        self.url = self.results['unescapedUrl']
+        self.url = url
+        
 
     def is_in_multiple_case(self, _string, text):
         return _string in text \
@@ -167,6 +181,7 @@ class UrlMediaParser(Thread):
     def run(self):
         media_list = []
         if self.url:
+            print 'deegging : ' + self.url
             try:
                 data = urllib.urlopen(self.url).read()
                 for line in data.split("\012"):
@@ -174,10 +189,9 @@ class UrlMediaParser(Thread):
                         s = re.compile('href=".*\.'+ format + '">').search(line,1)
                         if s:
                             file_name = line[s.start():s.end()].split('"')[1]
-                            print file_name
                             if self.is_in_multiple_case(self.text, file_name) or \
-                               self.is_in_multiple_case(self.text, self.url):
-                                media_list.append(self.url + file_name)
+                                    self.is_in_multiple_case(self.text, self.url):
+                                self.m3u.put(self.url + file_name)
             except:
                 pass