Automatically exported from code.google.com/p/planningalerts
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

18 лет назад
18 лет назад
18 лет назад
18 лет назад
18 лет назад
18 лет назад
18 лет назад
18 лет назад
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. <?php
  2. require_once('tools_ini.php');
  3. require_once('application.php');
  4. require_once('DB.php');
  5. $swiches = getopt('d:');
  6. $day = isset($swiches['d']) ? $swiches['d'] : null;
  7. //Initialise
  8. $application_parser = new application_parser();
  9. if(isset($day)){
  10. $application_parser->date = getdate(strtotime("-" . $day . " days"));
  11. $application_parser->run();
  12. }else{
  13. //Scrape for the last X days (apps already in the database are ignored)
  14. for ($i=0; $i < SCRAPE_DELAY; $i++){
  15. $application_parser->date = getdate(strtotime("-" . $i . " days"));
  16. $application_parser->run();
  17. }
  18. }
  19. //Send email
  20. $application_parser->email_log();
  21. //Parser class
  22. class application_parser{
  23. //Properties
  24. var $date;
  25. var $log = array();
  26. var $sleep_interval = 2; //how long to wait between scraping each feed
  27. //Constructor
  28. function application_parser (){
  29. //set default date
  30. $this->date = getdate();
  31. }
  32. //Run
  33. function run(){
  34. $db = DB::connect(DB_CONNECTION_STRING);
  35. $sql = "Select authority_id, feed_url, external, disabled, short_name from authority where disabled <> 1";
  36. $results = $db->getAll($sql);
  37. if (sizeof($results) == 0){
  38. //throw new exception("You need to put some authorities to scrape in the database");
  39. }
  40. //log
  41. $this->store_log("Scraping " . sizeof($results) . "authorities");
  42. //Parse & save each feed
  43. foreach($results as $result){
  44. //reset the timeout
  45. set_time_limit(0);
  46. $authority_id = $result[0];
  47. $external = $result[2];
  48. $disabled = $result[3];
  49. if($external != true){
  50. $feed_url = BASE_URL . $feed_url = $result[1];
  51. }else{
  52. $feed_url = $result[1];
  53. }
  54. //replace date wild cards
  55. $feed_url = str_replace("{day}",$this->date['mday'], $feed_url);
  56. $feed_url = str_replace("{month}",$this->date['mon'], $feed_url);
  57. $feed_url = str_replace("{year}",$this->date['year'], $feed_url);
  58. //log
  59. $this->store_log("Scraping authority " . $result[4] . " from " . $feed_url);
  60. //if it isnt disabled parse it
  61. if ($disabled == false){
  62. $applications = $this->parse_applications($feed_url, $authority_id);
  63. //log
  64. $this->store_log("Found " . sizeof($applications) . " applications for " . $result[4]);
  65. //save applications (probably shouldent be saved individually, but sod it for the moment)
  66. foreach ($applications as $application){
  67. if(!$application->exists()){
  68. $application->save();
  69. $this->store_log("Saving application" . $application->council_reference);
  70. }else{
  71. $this->store_log("Application already exists in database" . $application->council_reference);
  72. }
  73. }
  74. }
  75. //wait for a bit so we dont blow anyone's server (mainly tinyurl)
  76. sleep($this->sleep_interval);
  77. }
  78. }
  79. //Turn xml into application objects
  80. function parse_applications($feed_url, $authority_id){
  81. $return_applications = array();
  82. //reset warnings
  83. //Grab the XML
  84. $xml = "";
  85. try{
  86. $xml = safe_scrape_page($feed_url);
  87. }catch (exception $e){
  88. array_push($this->log, "ERROR: problem occured when grabbing feed: " . $feed_url . " ---->>>" . $e);
  89. }
  90. if ($xml == false){
  91. $this->store_log("ERROR: empty feed feed: " . $feed_url);
  92. }
  93. //Turn the xml into an object
  94. $parsed_applications = simplexml_load_string($xml);
  95. //Loop through the applications, add tinyurl / google maps etc and add to array
  96. if(sizeof($parsed_applications) >0){
  97. foreach($parsed_applications->applications->application as $parsed_application){
  98. $application = new application();
  99. //Grab basic data from the xml
  100. $application->authority_id = $authority_id;
  101. $application->council_reference = $parsed_application->council_reference;
  102. $date_received_dmy = split("/", $parsed_application->date_received);
  103. if (count($date_received_dmy) == 3){
  104. $application->date_received = "$date_received_dmy[2]-$date_received_dmy[1]-$date_received_dmy[0]";
  105. } else {
  106. // Make a best effort attempt to parse the date
  107. $ts = strtotime($parsed_application->date_received);
  108. if ($ts != FALSE && $ts != -1) {
  109. $application->date_received = date("Y-m-d", $ts);
  110. }
  111. }
  112. $application->address = $parsed_application->address;
  113. $application->description = $parsed_application->description;
  114. $application->info_url = $parsed_application->info_url;
  115. $application->comment_url = $parsed_application->comment_url;
  116. $application->date_scraped = mysql_date(time());
  117. //Make the urls
  118. $info_tiny_url = tiny_url($application->info_url);
  119. if ($info_tiny_url == ""){
  120. $this->store_log("ERROR: Created blank info tiny url");
  121. }
  122. $comment_tiny_url = tiny_url($application->comment_url);
  123. if ($comment_tiny_url == ""){
  124. $this->store_log("ERROR: Created blank comment tiny url");
  125. }
  126. if (isset($parsed_application->postcode)) {
  127. //Workout the XY location from postcode
  128. $xy = postcode_to_location($parsed_application->postcode);
  129. $application->postcode = $parsed_application->postcode;
  130. $application->x = $xy[0];
  131. $application->y = $xy[1];
  132. }
  133. else if (isset($parsed_application->easting) &&
  134. isset($parsed_application->northing)) {
  135. $postcode = location_to_postcode(
  136. $parsed_application->easting,
  137. $parsed_application->northing
  138. );
  139. $application->postcode = $postcode;
  140. $application->x = $parsed_application->easting;
  141. $application->y = $parsed_application->northing;
  142. }
  143. $application->info_tinyurl =$info_tiny_url;
  144. $application->comment_tinyurl = $comment_tiny_url;
  145. $application->map_url = googlemap_url_from_postcode($application->postcode);
  146. //Add to array
  147. array_push($return_applications, $application);
  148. }
  149. }
  150. return $return_applications;
  151. }
  152. function store_log($text){
  153. array_push($this->log, $text);
  154. print $text . "\n\n";
  155. }
  156. function email_log(){
  157. //Email log
  158. send_text_email(LOG_EMAIL, "parser@" . DOMAIN, "parser@" . DOMAIN, "Planning parser log", print_r($this->log, true));
  159. $this->store_log("Debug email sent to " . LOG_EMAIL);
  160. }
  161. }
  162. ?>