An open source, stand-alone, customisable public spending data web app.
 
 
 
 

200 lines
5.6 KiB

  1. require 'lib/models'
  2. require 'fastercsv'
  3. # Before running this script with a CSV file, prepare it so:
  4. # - There is only a single line of column headings on the first line of the file
  5. # - There are no spaces before or after the column headings
  6. # - The column headings correspond with the key names in the columns{} hash below
  7. # - The data starts on line 2
  8. def slugify(name)
  9. output = name.gsub(/[^\w\s-]/, '').gsub(/\s+/, '-').downcase
  10. output.gsub(/---/, '-')
  11. end
  12. columns = ()
  13. directorate_column = nil
  14. service_name_column = nil
  15. vendor_name_column = nil
  16. date_column = nil
  17. directorate_replacements =
  18. [
  19. [ "Childrens Services", "Children's Services" ],
  20. [ "Policy,Performance and Planning", "Policy, Performance and Planning" ]
  21. ]
  22. service_replacements =
  23. [
  24. [ "Corporate Performance and Developmt", "Corporate Performance and Development" ],
  25. [ "ISB", "ISB - Individual Schools Budget" ],
  26. [ "Library and Information Services", "Libraries and Information Services" ],
  27. [ "On Street Parking", "On-Street Parking" ]
  28. ]
  29. count = 0
  30. if ARGV[0].nil?
  31. puts "Specify the filename of the CSV file to import on the command line"
  32. exit
  33. end
  34. date_format = ARGV[1].upcase
  35. if date_format != 'DMY' && date_format != 'MDY'
  36. puts "Specify the date format as DMY or MDY as the second argument on the command line"
  37. exit
  38. end
  39. Setting.first_or_create(
  40. :id => 1,
  41. :site_name => 'Cotswold District Council Armchair Auditor',
  42. :site_tagline => 'moinkles',
  43. :site_url => 'http://cotswold.chard.org/',
  44. :org_name => 'Cotswolds',
  45. :org_url => 'http://www.cotswold.gov.uk/',
  46. :data_url => 'http://www.cotswold.gov.uk/nqcontent.cfm?a_id=13293#files'
  47. )
  48. FasterCSV.foreach(ARGV[0]) do |row|
  49. count += 1
  50. if (count > 1) # skip first line that doesn't contain data
  51. p row
  52. if not directorate_column.nil?
  53. directorate_name = row[columns[directorate_column]].strip.gsub(/&/, "and")
  54. directorate = Directorate.first_or_create(:name => directorate_name, :slug => slugify(directorate_name))
  55. unless directorate.save
  56. puts "ERROR: Failed to save directorate"
  57. puts directorate.errors.inspect
  58. end
  59. end
  60. service_name = row[columns[service_name_column]].strip.gsub(/&/, "and")
  61. supplier_name = row[columns[vendor_name_column]].strip.gsub(/&/, "and")
  62. #for replacement in directorate_replacements
  63. #if directorate_name == replacement[0]
  64. #directorate_name = replacement[1]
  65. #end
  66. #end
  67. for replacement in service_replacements
  68. if service_name == replacement[0]
  69. service_name = replacement[1]
  70. end
  71. end
  72. service = Service.first_or_create(:name => service_name, :directorate => directorate, :slug => slugify(service_name))
  73. unless service.save
  74. puts "ERROR: Failed to save service"
  75. puts service.errors.inspect
  76. end
  77. supplier = Supplier.first_or_create(:name => supplier_name, :slug => slugify(supplier_name))
  78. unless supplier.save
  79. puts "ERROR: Failed to save supplier"
  80. puts supplier.errors.inspect
  81. end
  82. if row[columns[date_column]].nil?
  83. if ARGV[2].nil?
  84. puts "ERROR: missing payment dates; specify date on command line"
  85. exit
  86. end
  87. dt = ARGV[2].strip.split('/')
  88. else
  89. dt = row[columns[date_column]].strip.split('/')
  90. end
  91. # Date.new takes YMD params
  92. if date_format == 'DMY'
  93. d = Date.new(dt[2].to_i, dt[1].to_i, dt[0].to_i)
  94. elsif date_format == 'MDY'
  95. d = Date.new(dt[2].to_i, dt[0].to_i, dt[1].to_i)
  96. elsif date_format == 'YMD'
  97. d = Date.new(dt[0].to_i, dt[1].to_i, dt[2].to_i)
  98. end
  99. payment = Payment.first_or_new(
  100. 'service' => service,
  101. 'supplier' => supplier,
  102. 'amount' => row[columns['Amount']].strip.gsub(/,/, ''),
  103. 'd' => d
  104. )
  105. unless payment.save
  106. puts "ERROR: Failed to save payment"
  107. puts payment.errors.inspect
  108. payment.errors.each do |e|
  109. puts e
  110. end
  111. end
  112. else
  113. # Get the column headings
  114. position = 0
  115. # Annoyingly, CDC has changed its column names, and we want to support
  116. # both types of file. Even more annoyingly, directorates aren't
  117. # specified any more.
  118. if row.include? 'Vendor Name'
  119. service_name_column = 'Service Area'
  120. vendor_name_column = 'Vendor Name'
  121. date_column = 'Payment Date'
  122. columns =
  123. {
  124. 'Body name' => nil,
  125. 'Body' => nil,
  126. 'Number' => nil,
  127. 'Invoice Ref.' => nil,
  128. 'Vendor Name' => nil,
  129. 'Expense' => nil,
  130. 'Expense Type' => nil,
  131. 'Cost Centre' => nil,
  132. 'Payment Date' => nil,
  133. 'Amount' => nil,
  134. 'Service Area' => nil,
  135. }
  136. else
  137. directorate_column = 'Service Area Categorisation'
  138. service_name_column = 'Service Division Categorisation'
  139. vendor_name_column = 'Supplier Name'
  140. if row.include? 'Invoice Date'
  141. date_column = 'Invoice Date'
  142. else
  143. date_column = 'Date'
  144. end
  145. columns =
  146. {
  147. 'Body Name' => nil,
  148. 'Body' => nil,
  149. 'Service Area Categorisation' => nil,
  150. 'Service Division Categorisation' => nil,
  151. 'Responsible Unit' => nil,
  152. 'Expenses type' => nil,
  153. 'Detailed expenses type' => nil,
  154. 'Expenses code' => nil,
  155. 'Narrative' => nil,
  156. date_column => nil,
  157. 'Transaction Number' => nil,
  158. 'Amount' => nil,
  159. 'Revenue/Capital' => nil,
  160. 'Supplier Name' => nil,
  161. 'Supplier ID' => nil,
  162. 'Contract ID' => nil,
  163. 'Notes' => nil
  164. }
  165. end
  166. for column in row
  167. columns[column] = position
  168. position += 1
  169. end
  170. puts columns.inspect
  171. end
  172. end