Automatically exported from code.google.com/p/planningalerts
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 
 
 
 

132 lignes
4.5 KiB

  1. #!/usr/bin/env python
  2. list_of_sites_filename = "SitesToGenerate.csv"
  3. other_files_to_copy_filename = "OtherFilesToCopy.csv"
  4. other_files_location = "python_scrapers/"
  5. template_filename = "python_scrapers/CGITemplate.py"
  6. cgi_dir = "cgi-bin/"
  7. import csv
  8. import urllib
  9. from os import chmod, environ
  10. from shutil import copyfile
  11. import MySQLdb
  12. # First, copy across files that are needed in the CGI directory
  13. # that aren't generated.
  14. other_files_to_copy = open(other_files_to_copy_filename)
  15. other_files_csv_reader = csv.DictReader(
  16. other_files_to_copy,
  17. quoting=csv.QUOTE_ALL,
  18. skipinitialspace=True,
  19. )
  20. for file_dict in other_files_csv_reader:
  21. filename = file_dict["filename"]
  22. copyfile(other_files_location + filename, cgi_dir+filename)
  23. # the idea here is to have filename and permissions
  24. # in the csv file.
  25. # Until version 2.6 of python, there is no easy way
  26. # to convert a string to an octal, so I am using
  27. # integers to represent permissions...
  28. # see README for details.
  29. chmod(cgi_dir+filename, int(file_dict["permissions"]))
  30. # Next we generate the cgi files
  31. list_of_sites_file = open(list_of_sites_filename)
  32. csv_reader = csv.DictReader(
  33. list_of_sites_file,
  34. quoting=csv.QUOTE_ALL,
  35. skipinitialspace=True,
  36. )
  37. # create cgi files and write them in the cgi directory
  38. template= open(template_filename).read()
  39. # Get a mysql cursor
  40. import pdb;pdb.set_trace()
  41. mysql_connection = MySQLdb.connect(
  42. db=environ['MYSQL_DB_NAME'],
  43. user=environ['MYSQL_USERNAME'],
  44. passwd=environ['MYSQL_PASSWORD'],
  45. )
  46. mysql_cursor = mysql_connection.cursor()
  47. python_scraper_location = "/cgi-bin/%s.cgi?day={day}&month={month}&year={year}"
  48. php_scraper_location = "/scrapers/%(php_scraper)s.php?day={day}&month={month}&year={year}"
  49. # All of this should probably be done with SqlAlchemy or something.
  50. authority_select_query = "SELECT * FROM authority WHERE short_name = '%(short_name)s';"
  51. # FIXME: Both of these queries should set planning_email and notes.
  52. authority_insert_query = 'INSERT INTO authority (full_name, short_name, feed_url, external, disabled) values ("%(full_name)s", "%(short_name)s", "%(feed_url)s", %(external)s, %(disabled)s);'
  53. authority_update_query = 'UPDATE authority SET full_name="%(full_name)s", external="%(external)s", disabled=%(disabled)s, feed_url="%(feed_url)s", external=%(external)s WHERE short_name = "%(short_name)s";'
  54. for site_dict in csv_reader:
  55. # We need these to be 1 or 0 to pass them into mysql.
  56. site_dict['external'] = 1 if site_dict['external'] else 0
  57. site_dict['disabled'] = 1 if site_dict['disabled'] else 0
  58. if site_dict['external']:
  59. # This scraper is somewhere else.
  60. pass
  61. elif site_dict['feed_url']:
  62. # This scraper is local and uses an non-generated file in cgi-bin
  63. pass
  64. elif site_dict['php_scraper']:
  65. # Uses a PHP scraper.
  66. site_dict['feed_url'] = php_scraper_location %site_dict
  67. elif site_dict['python_module'] and site_dict['parser_class']:
  68. # We need to generate a python CGI file
  69. file_location = cgi_dir + "%(short_name)s.cgi" %site_dict
  70. contents = template %site_dict
  71. this_file = open(file_location, "w")
  72. this_file.write(contents)
  73. this_file.close()
  74. chmod(file_location, 0755)
  75. quoted_short_name = urllib.quote(site_dict["short_name"])
  76. site_dict['feed_url'] = python_scraper_location %(quoted_short_name)
  77. else:
  78. # Something has gone wrong.
  79. print "ERROR: Config for %(short_name)s is faulty." %site_dict
  80. # print "Disabling this scraper"
  81. # FIXME: Should have a query here to set disabled for this scraper.
  82. continue
  83. # Do we have a record for this authority already?
  84. row_count = mysql_cursor.execute(authority_select_query %site_dict)
  85. if row_count > 1:
  86. print "ERROR: There is more than one row for %(short_name)s." %site_dict
  87. print "Skipping this scraper."
  88. continue
  89. elif row_count == 1:
  90. mysql_cursor.execute(authority_update_query %site_dict)
  91. elif row_count == 0:
  92. mysql_cursor.execute(authority_insert_query %site_dict)
  93. else:
  94. print "ERROR: How on earth did we get here? Row count is %s" %(row_count)
  95. # write a README to warn people not to svn add stuff to CGI directory
  96. readme_message = """
  97. WARNING - this directory is only for generated files
  98. and files which are automatically copied in.
  99. Anything manually added here will be lost.
  100. """
  101. readme_file = open(cgi_dir + "README", "w")
  102. readme_file.write(readme_message)
  103. readme_file.close()