Automatically exported from code.google.com/p/planningalerts
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 
 
 

130 rader
4.5 KiB

  1. #!/usr/bin/env python
  2. list_of_sites_filename = "SitesToGenerate.csv"
  3. other_files_to_copy_filename = "OtherFilesToCopy.csv"
  4. other_files_location = "python_scrapers/"
  5. template_filename = "python_scrapers/CGITemplate.py"
  6. cgi_dir = "cgi-bin/"
  7. import csv
  8. import urllib
  9. from os import chmod, environ
  10. from shutil import copyfile
  11. import MySQLdb
  12. # First, copy across files that are needed in the CGI directory
  13. # that aren't generated.
  14. other_files_to_copy = open(other_files_to_copy_filename)
  15. other_files_csv_reader = csv.DictReader(
  16. other_files_to_copy,
  17. quoting=csv.QUOTE_ALL,
  18. skipinitialspace=True,
  19. )
  20. for file_dict in other_files_csv_reader:
  21. filename = file_dict["filename"]
  22. copyfile(other_files_location + filename, cgi_dir+filename)
  23. # the idea here is to have filename and permissions
  24. # in the csv file.
  25. # Until version 2.6 of python, there is no easy way
  26. # to convert a string to an octal, so I am using
  27. # integers to represent permissions...
  28. # see README for details.
  29. chmod(cgi_dir+filename, int(file_dict["permissions"]))
  30. # Next we generate the cgi files
  31. list_of_sites_file = open(list_of_sites_filename)
  32. csv_reader = csv.DictReader(
  33. list_of_sites_file,
  34. quoting=csv.QUOTE_ALL,
  35. skipinitialspace=True,
  36. )
  37. # create cgi files and write them in the cgi directory
  38. template= open(template_filename).read()
  39. # Get a mysql cursor
  40. mysql_connection = MySQLdb.connect(
  41. db=environ['MYSQL_DB_NAME'],
  42. user=environ['MYSQL_USERNAME'],
  43. passwd=environ['MYSQL_PASSWORD'],
  44. )
  45. mysql_cursor = mysql_connection.cursor()
  46. python_scraper_location = "/cgi-bin/%s.cgi?day={day}&month={month}&year={year}"
  47. php_scraper_location = "/scrapers/%(php_scraper)s.php?day={day}&month={month}&year={year}"
  48. # All of this should probably be done with SqlAlchemy or something.
  49. authority_select_query = "SELECT * FROM authority WHERE short_name = '%(short_name)s';"
  50. # FIXME: Both of these queries should set planning_email and notes.
  51. authority_insert_query = 'INSERT INTO authority (full_name, short_name, feed_url, external, disabled, planning_email) values ("%(full_name)s", "%(short_name)s", "%(feed_url)s", %(external)s, %(disabled)s, "%(planning_email)s");'
  52. authority_update_query = 'UPDATE authority SET full_name="%(full_name)s", external="%(external)s", disabled=%(disabled)s, feed_url="%(feed_url)s", external=%(external)s WHERE short_name = "%(short_name)s";'
  53. for site_dict in csv_reader:
  54. # We need these to be 1 or 0 to pass them into mysql.
  55. site_dict['external'] = 1 if site_dict['external'] else 0
  56. site_dict['disabled'] = 1 if site_dict['disabled'] else 0
  57. if site_dict['external']:
  58. # This scraper is somewhere else.
  59. pass
  60. elif site_dict['feed_url']:
  61. # This scraper is local and uses an non-generated file in cgi-bin
  62. pass
  63. elif site_dict['php_scraper']:
  64. # Uses a PHP scraper.
  65. site_dict['feed_url'] = php_scraper_location %site_dict
  66. elif site_dict['python_module'] and site_dict['parser_class']:
  67. # We need to generate a python CGI file
  68. file_location = cgi_dir + "%(short_name)s.cgi" %site_dict
  69. contents = template %site_dict
  70. this_file = open(file_location, "w")
  71. this_file.write(contents)
  72. this_file.close()
  73. chmod(file_location, 0755)
  74. quoted_short_name = urllib.quote(site_dict["short_name"])
  75. site_dict['feed_url'] = python_scraper_location %(quoted_short_name)
  76. else:
  77. # Something has gone wrong.
  78. print "ERROR: Config for %(short_name)s is faulty." %site_dict
  79. # print "Disabling this scraper"
  80. # FIXME: Should have a query here to set disabled for this scraper.
  81. continue
  82. # Do we have a record for this authority already?
  83. row_count = mysql_cursor.execute(authority_select_query %site_dict)
  84. if row_count > 1:
  85. print "ERROR: There is more than one row for %(short_name)s." %site_dict
  86. print "Skipping this scraper."
  87. continue
  88. elif row_count == 1:
  89. mysql_cursor.execute(authority_update_query %site_dict)
  90. elif row_count == 0:
  91. mysql_cursor.execute(authority_insert_query %site_dict)
  92. else:
  93. print "ERROR: How on earth did we get here? Row count is %s" %(row_count)
  94. # write a README to warn people not to svn add stuff to CGI directory
  95. readme_message = """
  96. WARNING - this directory is only for generated files
  97. and files which are automatically copied in.
  98. Anything manually added here will be lost.
  99. """
  100. readme_file = open(cgi_dir + "README", "w")
  101. readme_file.write(readme_message)
  102. readme_file.close()