commit 49978b0cd0036d9e8ab72ce284c91651c6467f5d Author: Adrian Short Date: Mon Mar 19 08:46:08 2012 -0700 diff --git a/search.rb b/search.rb new file mode 100644 index 0000000..405eda6 --- /dev/null +++ b/search.rb @@ -0,0 +1,79 @@ +#!/usr/bin/ruby + +require 'rubygems' +require 'json' +require 'httpclient' +require 'uri' +require 'time' + +def backup(query, since_id = nil) + + uri = URI::parse('http://search.twitter.com') + client = HTTPClient.new + + @tweets = 0 + @page = 1 + @next_page = "?q=#{query}&rpp=100&page=#{@page}&result_type=recent" + + unless since_id.nil? + @next_page += "&since_id=#{since_id}" + end + + loop do + $stderr.puts "Trying page #{@page}" + + url = "#{uri}/search.json#{@next_page}" + $stderr.puts url + response = client.get(url) + + @json = JSON.parse(response.body) + + if response.status_code == 200 + $stderr.puts "Got page #{@page} OK" + + @count = @json['results'].size + @next_page = @json['next_page'] + + @json['results'].each do |tweet| + bits = [] + bits << Time.parse(tweet['created_at']).to_s + bits << tweet['id_str'] + bits << tweet['from_user'] + bits << tweet['text'].gsub(/\n/, ' ').gsub(/\t/, ' ') + bits << tweet['source'] + bits << tweet['from_user_id_str'] + bits << tweet['to_user'] + bits << tweet['to_user_id_str'] + puts bits.join "\t" + end + + $stderr.puts "#{@count} tweets processed" + @tweets += @count + @page += 1 + + else + # Some kind of error + $stderr.puts "HTTP #{response.status_code} status code" + + + @json['errors'].each do |error| + $stderr.puts "ERROR code #{error['code']}: #{error['message']}" + end + + if response.status_code == 403 + break + end + end + + sleep(10) + + if @count == 0 + break + end + end + + $stderr.puts "#{@tweets} tweets collected" +end + + +backup(ARGV.shift, ARGV.shift)