Because of my debate, I found a duplication bug in Tweet to Text that led me to post the fix here.
diff --git a/track.py b/track.py index ea27c2f..e651f51 100644 --- a/track.py +++ b/track.py @@ -65,7 +65,9 @@ class App: query = ' '.join(self.args) results = search(query) for i in results['results']: - history.append(i) + if i['id'] not in keys: + history.append(i) + keys[i['id']] = 1 for p in range(self.options.pages-1): if 'next_page' not in results: @@ -73,7 +75,9 @@ class App: next_page = results['next_page'] results = search(query, next_page[1:]) for i in results['results']: - history.append(i) + if i['id'] not in keys: + history.append(i) + keys[i['id']] = 1
After patched:
#!/usr/bin/env python from urllib import urlencode from urllib2 import urlopen, HTTPCookieProcessor, build_opener, install_opener, Request from optparse import OptionParser import time from datetime import datetime import simplejson cookie_processor = HTTPCookieProcessor() opener = build_opener(cookie_processor) install_opener(opener) api_url = 'http://search.twitter.com/search.json' results_per_page = 100 verbose = False def search(q, data=None): if not data: data = urlencode({'q': q, 'rpp': results_per_page}) if verbose: print data req = Request(api_url, data) fd = urlopen(req) results = simplejson.loads(fd.read()) return results def unescape(t): t = t.replace('"', '"') t = t.replace('&', '&') return t def format_tweet(t): created_at = datetime.fromtimestamp(time.mktime( time.strptime(t['created_at'][:-6], '%a, %d %b %Y %H:%M:%S'))- time.timezone) return '(%s) %s: %s' % (created_at.strftime('%H:%M'), t['from_user'], unescape(t['text'])) class App: def __init__(self): parser = OptionParser() parser.add_option('-v', '--verbose', default=False, action='store_true', dest='verbose', help='verbose') parser.add_option('-r', '--rpp', default=20, type='int', dest='rpp', help='results per page') parser.add_option('-p', '--pages', default=4, type='int', dest='pages', help='max pages') self.options, self.args = parser.parse_args() global verbose, results_per_page verbose = self.options.verbose results_per_page = self.options.rpp def run(self): keys = {} history = [] query = ' '.join(self.args) results = search(query) for i in results['results']: if i['id'] not in keys: history.append(i) keys[i['id']] = 1 for p in range(self.options.pages-1): if 'next_page' not in results: break next_page = results['next_page'] results = search(query, next_page[1:]) for i in results['results']: if i['id'] not in keys: history.append(i) keys[i['id']] = 1 history.reverse() for i in history: print format_tweet(i) if verbose: print len(history) if __name__ == '__main__': app = App() app.run()
I love git!
Post new comment