Automatically exported from code.google.com/p/planningalerts
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

application_parser.php 6.2 KiB

hace 18 años
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. <?php
  2. require_once('tools_ini.php');
  3. require_once('application.php');
  4. require_once('DB.php');
  5. //Initialise
  6. $application_parser = new application_parser();
  7. $application_parser->date = getdate(strtotime("-" . SCRAPE_DELAY . " days"));
  8. $application_parser->run();
  9. //class
  10. class application_parser{
  11. //Properties
  12. var $date;
  13. var $log = array();
  14. var $sleep_interval = 5; //how long to wait between scraping each feed
  15. //Constructor
  16. function application_parser (){
  17. //set default date
  18. $this->date = getdate();
  19. }
  20. //Run
  21. function run(){
  22. $db = DB::connect(DB_CONNECTION_STRING);
  23. $sql = "Select authority_id, feed_url, external, disabled, short_name from authority where disabled <> 1";
  24. $results = $db->getAll($sql);
  25. if (sizeof($results) == 0){
  26. //throw new exception("You need to put some authorities to scrape in the database");
  27. }
  28. //log
  29. $this->store_log("Scraping " . sizeof($results) . "authorities");
  30. //Parse & save each feed
  31. foreach($results as $result){
  32. //reset the timeout
  33. set_time_limit(0);
  34. $authority_id = $result[0];
  35. $external = $result[2];
  36. $disabled = $result[3];
  37. if($external != true){
  38. $feed_url = BASE_URL . $feed_url = $result[1];
  39. }else{
  40. $feed_url = $result[1];
  41. }
  42. //replace date wild cards
  43. $feed_url = str_replace("{day}",$this->date['mday'], $feed_url);
  44. $feed_url = str_replace("{month}",$this->date['mon'], $feed_url);
  45. $feed_url = str_replace("{year}",$this->date['year'], $feed_url);
  46. //log
  47. $this->store_log("Scraping authority " . $result[4] . " from " . $feed_url);
  48. //if it isnt disabled parse it
  49. if ($disabled == false){
  50. $applications = $this->parse_applications($feed_url, $authority_id);
  51. //log
  52. $this->store_log("Found " . sizeof($applications) . " applications for " . $result[4]);
  53. //save applications (probably shouldent be saved individually, but sod it for the moment)
  54. foreach ($applications as $application){
  55. if(!$application->exists()){
  56. $application->save();
  57. $this->store_log("Saving application" . $application->council_reference);
  58. }else{
  59. $this->store_log("Application already exists in database" . $application->council_reference);
  60. }
  61. }
  62. }
  63. //wait for a bit so we dont blow anyone's server (mainly tinyurl)
  64. sleep($this->sleep_interval);
  65. }
  66. //Email log
  67. send_text_email(LOG_EMAIL, "parser@" . DOMAIN, "parser@" . DOMAIN, "Planning parser log", print_r($this->log, true));
  68. $this->store_log("Debug email sent to " . LOG_EMAIL);
  69. }
  70. //Turn xml into application objects
  71. function parse_applications($feed_url, $authority_id){
  72. $return_applications = array();
  73. //reset warnings
  74. //Grab the XML
  75. $xml = "";
  76. try{
  77. $xml = safe_scrape_page($feed_url);
  78. }catch (exception $e){
  79. array_push($this->log, "ERROR: problem occured when grabbing feed: " . $feed_url . " ---->>>" . $e);
  80. }
  81. if ($xml == false){
  82. $this->store_log("ERROR: empty feed feed: " . $feed_url);
  83. }
  84. //Turn the xml into an object
  85. $parsed_applications = simplexml_load_string($xml);
  86. //Loop through the applications, add tinyurl / google maps etc and add to array
  87. if(sizeof($parsed_applications) >0){
  88. foreach($parsed_applications->applications->application as $parsed_application){
  89. $application = new application();
  90. //Grab basic data from the xml
  91. $application->authority_id = $authority_id;
  92. $application->council_reference = $parsed_application->council_reference;
  93. $application->date_received = $parsed_application->date_received;
  94. $application->address = $parsed_application->address;
  95. $application->postcode = $parsed_application->postcode;
  96. $application->description = $parsed_application->description;
  97. $application->info_url = $parsed_application->info_url;
  98. $application->comment_url = $parsed_application->comment_url;
  99. $application->date_scraped = mysql_date(time());
  100. //Make the urls
  101. $info_tiny_url = tiny_url($application->info_url);
  102. if ($info_tiny_url == ""){
  103. $this->store_log("ERROR: Created blank info tiny url");
  104. }
  105. $comment_tiny_url = tiny_url($application->comment_url);
  106. if ($comment_tiny_url == ""){
  107. $this->store_log("ERROR: Created blank comment tiny url");
  108. }
  109. $application->info_tinyurl =$info_tiny_url;
  110. $application->comment_tinyurl = $comment_tiny_url;
  111. $application->map_url = googlemap_url_from_postcode($application->postcode);
  112. //Workout the XY location from postcode
  113. $xy = postcode_to_location($application->postcode);
  114. $application->x = $xy[0];
  115. $application->y = $xy[1];
  116. //Add to array
  117. array_push($return_applications, $application);
  118. }
  119. }
  120. return $return_applications;
  121. }
  122. function store_log($text){
  123. array_push($this->log, $text);
  124. print $text . "\n\n";
  125. }
  126. }
  127. ?>