Automatically exported from code.google.com/p/planningalerts
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 
 
 
 
 

198 řádky
7.1 KiB

  1. <?php
  2. require_once('tools_ini.php');
  3. require_once('application.php');
  4. require_once('DB.php');
  5. $swiches = getopt('d:');
  6. $day = isset($swiches['d']) ? $swiches['d'] : null;
  7. //Initialise
  8. $application_parser = new application_parser();
  9. if(isset($day)){
  10. $application_parser->date = getdate(strtotime("-" . $day . " days"));
  11. $application_parser->run();
  12. }else{
  13. //Scrape for the last X days (apps already in the database are ignored)
  14. for ($i=0; $i < SCRAPE_DELAY; $i++){
  15. $application_parser->date = getdate(strtotime("-" . $i . " days"));
  16. $application_parser->run();
  17. }
  18. }
  19. //Send email
  20. $application_parser->email_log();
  21. //Parser class
  22. class application_parser{
  23. //Properties
  24. var $date;
  25. var $log = array();
  26. var $sleep_interval = 2; //how long to wait between scraping each feed
  27. //Constructor
  28. function application_parser (){
  29. //set default date
  30. $this->date = getdate();
  31. }
  32. //Run
  33. function run(){
  34. $db = DB::connect(DB_CONNECTION_STRING);
  35. $sql = "Select authority_id, feed_url, external, disabled, short_name from authority where disabled <> 1";
  36. $results = $db->getAll($sql);
  37. if (sizeof($results) == 0){
  38. //throw new exception("You need to put some authorities to scrape in the database");
  39. }
  40. //log
  41. $this->store_log("Scraping " . sizeof($results) . "authorities");
  42. //Parse & save each feed
  43. foreach($results as $result){
  44. //reset the timeout
  45. set_time_limit(0);
  46. $authority_id = $result[0];
  47. $external = $result[2];
  48. $disabled = $result[3];
  49. if($external != true){
  50. $feed_url = BASE_URL . $feed_url = $result[1];
  51. }else{
  52. $feed_url = $result[1];
  53. }
  54. //replace date wild cards
  55. $feed_url = str_replace("{day}",$this->date['mday'], $feed_url);
  56. $feed_url = str_replace("{month}",$this->date['mon'], $feed_url);
  57. $feed_url = str_replace("{year}",$this->date['year'], $feed_url);
  58. //log
  59. $this->store_log("Scraping authority " . $result[4] . " from " . $feed_url);
  60. //if it isnt disabled parse it
  61. if ($disabled == false){
  62. $applications = $this->parse_applications($feed_url, $authority_id);
  63. //log
  64. $this->store_log("Found " . sizeof($applications) . " applications for " . $result[4]);
  65. //save applications (probably shouldent be saved individually, but sod it for the moment)
  66. foreach ($applications as $application){
  67. if(!$application->exists()){
  68. $application->save();
  69. $this->store_log("Saving application" . $application->council_reference);
  70. }else{
  71. $this->store_log("Application already exists in database" . $application->council_reference);
  72. }
  73. }
  74. }
  75. //wait for a bit so we dont blow anyone's server (mainly tinyurl)
  76. sleep($this->sleep_interval);
  77. }
  78. }
  79. //Turn xml into application objects
  80. function parse_applications($feed_url, $authority_id){
  81. $return_applications = array();
  82. //reset warnings
  83. //Grab the XML
  84. $xml = "";
  85. try{
  86. $xml = safe_scrape_page($feed_url);
  87. }catch (exception $e){
  88. array_push($this->log, "ERROR: problem occured when grabbing feed: " . $feed_url . " ---->>>" . $e);
  89. }
  90. if ($xml == false){
  91. $this->store_log("ERROR: empty feed feed: " . $feed_url);
  92. }
  93. //Turn the xml into an object
  94. $parsed_applications = simplexml_load_string($xml);
  95. //Loop through the applications, add tinyurl / google maps etc and add to array
  96. if(sizeof($parsed_applications) >0){
  97. foreach($parsed_applications->applications->application as $parsed_application){
  98. $application = new application();
  99. //Grab basic data from the xml
  100. $application->authority_id = $authority_id;
  101. $application->council_reference = $parsed_application->council_reference;
  102. $date_received_dmy = split("/", $parsed_application->date_received);
  103. if (count($date_received_dmy) == 3){
  104. $application->date_received = "$date_received_dmy[2]-$date_received_dmy[1]-$date_received_dmy[0]";
  105. } else {
  106. // Make a best effort attempt to parse the date
  107. $ts = strtotime($parsed_application->date_received);
  108. if ($ts != FALSE && $ts != -1) {
  109. $application->date_received = date("Y-m-d", $ts);
  110. }
  111. }
  112. $application->address = $parsed_application->address;
  113. $application->postcode = $parsed_application->postcode;
  114. $application->description = $parsed_application->description;
  115. $application->info_url = $parsed_application->info_url;
  116. $application->comment_url = $parsed_application->comment_url;
  117. $application->date_scraped = mysql_date(time());
  118. //Make the urls
  119. $info_tiny_url = tiny_url($application->info_url);
  120. if ($info_tiny_url == ""){
  121. $this->store_log("ERROR: Created blank info tiny url");
  122. }
  123. $comment_tiny_url = tiny_url($application->comment_url);
  124. if ($comment_tiny_url == ""){
  125. $this->store_log("ERROR: Created blank comment tiny url");
  126. }
  127. $application->info_tinyurl =$info_tiny_url;
  128. $application->comment_tinyurl = $comment_tiny_url;
  129. $application->map_url = googlemap_url_from_postcode($application->postcode);
  130. //Workout the XY location from postcode
  131. $xy = postcode_to_location($application->postcode);
  132. $application->x = $xy[0];
  133. $application->y = $xy[1];
  134. //Add to array
  135. array_push($return_applications, $application);
  136. }
  137. }
  138. return $return_applications;
  139. }
  140. function store_log($text){
  141. array_push($this->log, $text);
  142. print $text . "\n\n";
  143. }
  144. function email_log(){
  145. //Email log
  146. send_text_email(LOG_EMAIL, "parser@" . DOMAIN, "parser@" . DOMAIN, "Planning parser log", print_r($this->log, true));
  147. $this->store_log("Debug email sent to " . LOG_EMAIL);
  148. }
  149. }
  150. ?>