Sugree

skip to navigation

Tweet to Text 3

It seems Python on non-linux platform, e.g., Mac and Windows, doesn't support Unicode so well. I added another option to specify target encoding if necessary and the default encoding is 'utf-8'. This patch is against Tweet to Text 2.

diff --git a/track.py b/track.py
index e651f51..1842e61 100644
--- a/track.py
+++ b/track.py
@@ -52,6 +52,9 @@ class App:
         parser.add_option('-p', '--pages',
                           default=4, type='int', dest='pages',
                           help='max pages')
+        parser.add_option('--encoding',
+                          default='utf-8', dest='encoding',
+                          help='output encoding')
 
         self.options, self.args = parser.parse_args()
 
@@ -81,7 +84,8 @@ class App:
 
         history.reverse()
         for i in history:
-            print format_tweet(i)
+            tweet = format_tweet(i)
+            print tweet.encode(self.options.encoding, 'replace')
 
         if verbose:

Full code:

#!/usr/bin/env python
 
from urllib import urlencode
from urllib2 import urlopen, HTTPCookieProcessor, build_opener, install_opener, Request
from optparse import OptionParser
import time
from datetime import datetime
 
import simplejson
 
cookie_processor = HTTPCookieProcessor()
opener = build_opener(cookie_processor)
install_opener(opener)
 
api_url = 'http://search.twitter.com/search.json'
results_per_page = 100
verbose = False
 
def search(q, data=None):
    if not data:
        data = urlencode({'q': q,
                          'rpp': results_per_page})
    if verbose:
        print data
    req = Request(api_url, data)
    fd = urlopen(req)
    results = simplejson.loads(fd.read())
    return results
 
def unescape(t):
    t = t.replace('"', '"')
    t = t.replace('&', '&')
    return t
 
def format_tweet(t):
    created_at = datetime.fromtimestamp(time.mktime(
                 time.strptime(t['created_at'][:-6], '%a, %d %b %Y %H:%M:%S'))-
                 time.timezone)
    return '(%s) %s: %s' % (created_at.strftime('%H:%M'),
                            t['from_user'],
                            unescape(t['text']))
 
class App:
    def __init__(self):
        parser = OptionParser()
        parser.add_option('-v', '--verbose',
                          default=False, action='store_true', dest='verbose',
                          help='verbose')
        parser.add_option('-r', '--rpp',
                          default=20, type='int', dest='rpp',
                          help='results per page')
        parser.add_option('-p', '--pages',
                          default=4, type='int', dest='pages',
                          help='max pages')
        parser.add_option('--encoding',
                          default='utf-8', dest='encoding',
                          help='output encoding')
 
        self.options, self.args = parser.parse_args()
 
        global verbose, results_per_page
        verbose = self.options.verbose
        results_per_page = self.options.rpp
 
    def run(self):
        keys = {}
        history = []
        query = ' '.join(self.args)
        results = search(query)
        for i in results['results']:
            if i['id'] not in keys:
                history.append(i)
                keys[i['id']] = 1
 
        for p in range(self.options.pages-1):
            if 'next_page' not in results:
                break
            next_page = results['next_page']
            results = search(query, next_page[1:])
            for i in results['results']:
                if i['id'] not in keys:
                    history.append(i)
                    keys[i['id']] = 1
 
        history.reverse()
        for i in history:
            tweet = format_tweet(i)
            print tweet.encode(self.options.encoding, 'replace')
 
        if verbose:
            print len(history)
 
if __name__ == '__main__':
    app = App()
    app.run()
Submitted by sugree on Thu, 09/04/2008 - 12:39

Post new comment

The content of this field is kept private and will not be shown publicly.
+

Main menu

+++++++++

Popular Tags

debian
apache
drupal
drupal.in.th
feedburner

++++++++++++++++++++++