import sys
import urllib
import simplejson
+import Queue
from threading import Thread
+from optparse import OptionParser
-version = '0.2'
+version = '0.3'
def prog_info():
return """ deegger : easy media crawler through google search api
""" % version
-BLACKLIST = ['http://www.mobzy.us/',]
+class DeeGGer(object):
-
-class Logger:
- """A logging object"""
-
- def __init__(self, file):
- import logging
- self.logger = logging.getLogger('myapp')
- self.hdlr = logging.FileHandler(file)
- self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
- self.hdlr.setFormatter(self.formatter)
- self.logger.addHandler(self.hdlr)
- self.logger.setLevel(logging.INFO)
-
- def write(self, message):
- self.logger.info(message)
-
-
-class DeeGGer(Thread):
-
- def __init__(self, format, text, m3u_dir):
- Thread.__init__(self)
- self.format = format
- self.text = text
- self.m3u_dir = m3u_dir
+ def __init__(self, options, args):
+ self.format = options.format
+ self.text = options.text
+ self.m3u_dir = options.output
if not os.path.exists(self.m3u_dir):
os.makedirs(self.m3u_dir)
self.m3u_file = self.m3u_dir + os.sep + 'deegger_' + self.text.replace('/', '_') + '.' + self.format + '.m3u'
- self.m3u = M3UPlaylist(self.m3u_file)
-
- self.n = 20
+ self.range = 4
+ self.servers = []
+
self.media_q = 'intitle:"index.of" "parent directory" "size" "last modified" "description" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery -intitle:"last modified"' % (self.format, self.format, self.format)
#self.media_q = 'intitle:"index.of" [snd] (%s) -inurl:(jsp|php|html|aspx|htm|cf|shtml|lyrics|index|%s|%ss) -gallery' % (self.format, self.format, self.format)
- self.q = '%s %s' % (self.text, self.media_q)
- self.results = self.google_search()
+
+ self.query = '%s %s' % (self.text, self.media_q)
+ self.q = Queue.Queue(1)
+ self.results = Queue.Queue(1)
- def google_search(self):
+ def run(self):
+ g = GoogleSearch(self.range, self.query)
+ self.results = g.search()
+# print self.results
+ for result in self.results:
+ url = result['url']
+ s = UrlMediaParser(self.format, self.text, url, self.q)
+ s.start()
+
+ self.m3u = M3UPlaylist(self.q, self.m3u_file)
+ self.m3u.start()
+
+ self.q.join()
+ self.m3u.close()
+
+class Producer(Thread):
+ """a Producer master thread"""
+
+ def __init__(self, q):
+ Thread.__init__(self)
+ self.q = q
+
+ def run(self):
+ i=0
+ q = self.q
+ while True:
+ q.put(i,1)
+ i+=1
+
+class GoogleSearch(object):
+
+ def __init__(self, range, query):
+ self.range = range
+ self.query = query
+
+ def search(self):
results = []
- for j in range(0,self.n):
- page = str(j*4)
- query = urllib.urlencode({'q' : self.q, 'start': page})
+ for j in range(0, self.range):
+ page = str(j)
+ query = urllib.urlencode({'q' : self.query, 'start': page})
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % (query)
json = simplejson.loads(urllib.urlopen(url).read())
- #print json
if json['responseData']:
for r in json['responseData']['results']:
results.append(r)
- #except:
- #pass
- ##print "ERROR"
return results
- def run(self):
- #print self.results
- #print len(self.results)
- parsers = []
- media_list = []
- for result in self.results:
- url = result['unescapedUrl']
- if not url in BLACKLIST:
- parser = UrlMediaParser(self.m3u, self.format, self.text, url)
- try:
- list = parser.start()
-
- except:
- continue
- self.m3u.close()
-
-
-class M3UPlaylist(object):
-
- def __init__(self, m3u_file):
- self.m3u_file = m3u_file
- self.m3u = open(self.m3u_file, 'w')
- self.m3u.write('#EXTM3U\n')
- #self.m3u.flush()
-
- def put(self, url):
- print 'adding : ' + url
- info = '#EXTINF:'',%s' % (url +'\n')
- self.m3u.write(info)
- self.m3u.write(url + '\n')
- #self.m3u.flush()
-
- def close(self):
- self.m3u.close()
class UrlMediaParser(Thread):
- def __init__(self, m3u, format, text, url):
+ def __init__(self, format, text, url, q):
Thread.__init__(self)
- self.m3u = m3u
self.format = format
self.text = text
self.url = url
-
+ self.q = q
def is_in_multiple_case(self, _string, text):
return _string in text \
def run(self):
+ q = self.q
media_list = []
if self.url:
print 'deegging : ' + self.url
try:
data = urllib.urlopen(self.url).read()
for line in data.split("\012"):
- for format in self.get_multiple_case_string(self.format):
- s = re.compile('href=".*\.'+ format + '">').search(line,1)
- if s:
- file_name = line[s.start():s.end()].split('"')[1]
- if self.is_in_multiple_case(self.text, file_name) or \
- self.is_in_multiple_case(self.text, self.url):
- self.m3u.put(self.url + file_name)
+ s = re.compile('href=".*\.'+ format + '"').search(line,1)
+ if s:
+ file_name = line[s.start():s.end()].split('"')[1]
+ if self.is_in_multiple_case(self.text, file_name):
+ q.put(self.url + '/' + file_name)
+
except:
pass
- if media_list:
- return media_list
+
+class M3UPlaylist(Thread):
+
+ def __init__(self, q, m3u_file):
+ Thread.__init__(self)
+ self.q = q
+ self.m3u_file = m3u_file
+ self.m3u = open(self.m3u_file, 'w')
+ self.m3u.write('#EXTM3U\n')
+ #self.m3u.flush()
+
+ def run(self):
+ url = self.q.get()
+ print 'adding : ' + url
+ info = '#EXTINF:'',%s' % (url +'\n')
+ self.m3u.write(info)
+ self.m3u.write(url + '\n')
+ #self.m3u.flush()
+
+ def close(self):
+ self.m3u.close()
+
+
+class Logger:
+ """A logging object"""
+
+ def __init__(self, file):
+ import logging
+ self.logger = logging.getLogger('myapp')
+ self.hdlr = logging.FileHandler(file)
+ self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
+ self.hdlr.setFormatter(self.formatter)
+ self.logger.addHandler(self.hdlr)
+ self.logger.setLevel(logging.INFO)
+
+ def write(self, message):
+ self.logger.info(message)
+
def main():
- if len(sys.argv) == 4:
- d = DeeGGer(sys.argv[1], sys.argv[2], sys.argv[3])
- d.start()
- else:
+ if len(sys.argv) <= 2:
text = prog_info()
sys.exit(text)
-
+
+ else:
+ parser = OptionParser()
+ parser.add_option("-t", "--text", dest="text", help="set the TEXT google query", metavar="TEXT")
+ parser.add_option("-f", "--format", dest="format", help="set the format to search for" , metavar="FORMAT")
+ parser.add_option("-o", "--output", dest="output", help="set the output directory" , metavar="OUTPUT")
+ (options, args) = parser.parse_args()
+
+ d = DeeGGer(options, args)
+ d.run()
+
if __name__ == '__main__':
main()
# @deefuzz_test3
-key='146002442-qgtArE6YrpLfL6h51LnE5TA9skcKhOqDraNDaOY'
-secret='8RWoZjllOv52PUmXbLJcu5qunY8qAa6V6pyLGBHEcg'
+test_key='146002442-qgtArE6YrpLfL6h51LnE5TA9skcKhOqDraNDaOY'
+test_secret='8RWoZjllOv52PUmXbLJcu5qunY8qAa6V6pyLGBHEcg'
# @parisson_studio
-#key='223431436-8uYqGM0tLHBiMbk6Bt39oBfwXpylfLcr7t6bs311'
-#secret='SzWD3fDgBpw9qwNNrYarXTcRJSTklp0PpKXg7Iw'
+ps_key='223431436-8uYqGM0tLHBiMbk6Bt39oBfwXpylfLcr7t6bs311'
+ps_secret='SzWD3fDgBpw9qwNNrYarXTcRJSTklp0PpKXg7Iw'
+
+# @parisson_com
+pc_key='241046394-MpI5YrkgHSjW0Ab4WIlU0nJruGqesLueCWDJ1qtx'
+pc_secret='6gRzqDvqkjhRzFCfetdWfZYPQdbvQQhVEhhGHQ90JCM'
# Twitter DeeFuzzer keys
DEEFUZZER_CONSUMER_KEY = 'ozs9cPS2ci6eYQzzMSTb4g'
DEEFUZZER_CONSUMER_SECRET = '1kNEffHgGSXO2gMNTr8HRum5s2ofx3VQnJyfd0es'
+escape = ['parisson_studio', 'parisson_com', 'kvraudio']
-class Twitter:
+class Twitter(object):
def __init__(self, access_token_key, access_token_secret):
import twitter
consumer_secret=self.consumer_secret,
access_token_key=self.access_token_key,
access_token_secret=self.access_token_secret)
- self.followers = self.api.GetFollowers()
- self.friends = self.api.GetFriends()
+ self.followers = self.get_followers()
+ self.friends = self.get_friends()
def post(self, message):
try:
except:
pass
- def print_followers(self):
- print str(len(self.followers)) + ' Followers:'
- for f in self.followers:
- print ' ' + f.screen_name
+ def get_friends(self):
+ l = []
+ for f in self.api.GetFriends():
+ l.append(f.screen_name)
+ return l
+
+ def get_followers(self):
+ l = []
+ for f in self.api.GetFollowers():
+ l.append(f.screen_name)
+ return l
def send_private_mess(self, mess, tags):
for f in self.followers:
- self.api.PostDirectMessage(f.screen_name, mess + ' #' + (' #').join(tags))
+ self.api.PostDirectMessage(f, mess + ' #' + (' #').join(tags))
- def print_friends(self):
- print str(len(self.friends)) + ' Friends:'
- for f in self.friends:
- print ' ' + f.screen_name
-
def send_friends_mess(self, mess, tags):
- for f in self.followers:
- self.post('@' + f.screen_name + ' ' + mess + ' #' + ' #'.join(tags))
-
-
+ mess_header = mess
+ for f in self.friends:
+ if not f in escape:
+ mess = '@' + f + ' ' + mess_header + ' #' + ' #'.join(tags)
+ print mess
+ self.post(mess)
+
+ def add_friends(self, friends):
+ for f in friends:
+ if not f in self.friends and not f in escape:
+ self.api.CreateFriendship(f)
+
if __name__ == '__main__':
- mess = 'Hello World ! TEST ! RVSP'
- tags = ['t35t', 'test', 'TesT']
+ mess = 'TC-202 Case : the mobile media solution now released by Parisson http://bit.ly/gSvqaF'
+ tags = ['proaudio', 'broadcast']
+
+ print ('IN')
+ twitt_in = Twitter(ps_key, ps_secret)
+ print str(len(twitt_in.followers)) + ' Followers:'
+ print twitt_in.followers
+ print str(len(twitt_in.friends)) + ' Friends:'
+ print twitt_in.friends
- twitt = Twitter(key, secret)
+ print ('OUT')
+ twitt_out = Twitter(pc_key, pc_secret)
+ print str(len(twitt_out.followers)) + ' Followers:'
+ print twitt_out.followers
+ print str(len(twitt_out.friends)) + ' Friends:'
+ print twitt_out.friends
- twitt.print_followers()
- twitt.print_friends()
+ #twitt_out.add_friends(twitt_in.friends)
+ #twitt.send_private_mess(mess, tags)
+ twitt_out.send_friends_mess(mess, tags)
- twitt.send_private_mess(mess, tags)
- twitt.send_friends_mess(mess, tags)
+ print 'OK'
\ No newline at end of file