|
- #!/usr/bin/ruby
-
- require 'rubygems'
- require 'json'
- require 'httpclient'
- require 'uri'
- require 'time'
-
- def backup(query, since_id = nil)
-
- uri = URI::parse('http://search.twitter.com')
- client = HTTPClient.new
-
- @tweets = 0
- @page = 1
- @next_page = "?q=#{query}&rpp=100&page=#{@page}&result_type=recent"
-
- unless since_id.nil?
- @next_page += "&since_id=#{since_id}"
- end
-
- loop do
- $stderr.puts "Trying page #{@page}"
-
- url = "#{uri}/search.json#{@next_page}"
- $stderr.puts url
- response = client.get(url)
-
- @json = JSON.parse(response.body)
-
- if response.status_code == 200
- $stderr.puts "Got page #{@page} OK"
-
- @count = @json['results'].size
- @next_page = @json['next_page']
-
- @json['results'].each do |tweet|
- bits = []
- bits << Time.parse(tweet['created_at']).to_s
- bits << tweet['id_str']
- bits << tweet['from_user']
- bits << tweet['text'].gsub(/\n/, ' ').gsub(/\t/, ' ')
- bits << tweet['source']
- bits << tweet['from_user_id_str']
- bits << tweet['to_user']
- bits << tweet['to_user_id_str']
- puts bits.join "\t"
- end
-
- $stderr.puts "#{@count} tweets processed"
- @tweets += @count
- @page += 1
-
- else
- # Some kind of error
- $stderr.puts "HTTP #{response.status_code} status code"
-
-
- @json['errors'].each do |error|
- $stderr.puts "ERROR code #{error['code']}: #{error['message']}"
- end
-
- if response.status_code == 403
- break
- end
- end
-
- sleep(10)
-
- if @count == 0
- break
- end
- end
-
- $stderr.puts "#{@tweets} tweets collected"
- end
-
-
- backup(ARGV.shift, ARGV.shift)
|