An open source, stand-alone, customisable public spending data web app.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

import-sutton.rb 3.0 KiB

9 vuotta sitten
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. require './models'
  2. require 'fastercsv'
  3. # Before running this script with a CSV file, prepare it so:
  4. # - There is only a single line of column headings on the first line of the file
  5. # - There are no spaces before or after the column headings
  6. # - The column headings correspond with the key names in the columns{} hash below
  7. # - The data starts on line 2
  8. def slugify(name)
  9. output = name.gsub(/[^\w\s-]/, '').gsub(/\s+/, '-').downcase
  10. output.gsub(/---/, '-')
  11. end
  12. months = %w[ dummy Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ]
  13. columns =
  14. {
  15. 'Directorate' => nil,
  16. 'Updated' => nil,
  17. 'Service' => nil,
  18. 'Supplier' => nil,
  19. 'Amount' => nil,
  20. 'Transaction Number' => nil,
  21. }
  22. directorate_replacements =
  23. [
  24. ]
  25. service_replacements =
  26. [
  27. ]
  28. count = 0
  29. if ARGV[0].nil?
  30. puts "Specify the filename of the CSV file to import on the command line"
  31. exit
  32. end
  33. FasterCSV.foreach(ARGV[0]) do |row|
  34. count += 1
  35. if (count > 1) # skip first line that doesn't contain data
  36. p row
  37. unless row[columns['Directorate']].nil?
  38. directorate_name = row[columns['Directorate']].strip.gsub(/&/, "and")
  39. end
  40. service_name = row[columns['Service']].strip.gsub(/&/, "and")
  41. supplier_name = row[columns['Supplier']].strip.gsub(/&/, "and")
  42. for replacement in directorate_replacements
  43. if directorate_name == replacement[0]
  44. directorate_name = replacement[1]
  45. end
  46. end
  47. for replacement in service_replacements
  48. if service_name == replacement[0]
  49. service_name = replacement[1]
  50. end
  51. end
  52. if directorate_name.nil?
  53. directorate = nil
  54. else
  55. directorate = Directorate.first_or_create(:name => directorate_name, :slug => slugify(directorate_name))
  56. directorate.save
  57. end
  58. service = Service.first_or_create(:name => service_name, :directorate => directorate, :slug => slugify(service_name))
  59. service.save
  60. supplier = Supplier.first_or_create(:name => supplier_name, :slug => slugify(supplier_name))
  61. supplier.save
  62. dt = row[columns['Updated']].strip.split(' ')
  63. d = Date.new(dt[2].to_i, months.index(dt[1]), dt[0].to_i)
  64. # Using Payment.new rather than Payment.first_or_new allows us to create genuine duplicates
  65. # so don't run the importer more than once with the same set of data
  66. payment = Payment.new(
  67. 'service' => service,
  68. 'supplier' => supplier,
  69. 'amount' => row[columns['Amount']].strip.gsub(/,/, ''),
  70. 'd' => d,
  71. 'transaction_id' => row[columns['Transaction Number']].strip.to_i
  72. )
  73. unless payment.save # save runs callbacks/hooks, save! doesn't
  74. puts "ERROR: Failed to save payment"
  75. payment.errors.each do |e|
  76. puts e
  77. end
  78. end
  79. else
  80. # Get the column headings
  81. position = 0
  82. for column in row
  83. columns[column] = position
  84. position += 1
  85. end
  86. puts columns.inspect
  87. end
  88. end