|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422 |
- <?php
-
- //Includes
- require_once('config.php');
- require_once('application.php');
- require_once ("PEAR/HTTP/Request.php");
- require_once('phpcoord.php');
-
- //Generic scrapers
- function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){
-
- $applications = array();
- $application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/";
-
- //grab the page
- $html = safe_scrape_page($search_url);
-
- //clean html
- $html = str_replace("\r\n","", $html);
-
- preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
-
- foreach ($application_matches[0] as $application_match){
-
- $detail_pattern = "/<td>([^<])*/";
- preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER);
-
- $application = new Application();
-
- //match the basic details
- $application->council_reference = str_replace("<td>", "", $detail_matches[0][0]);
- $application->date_received = str_replace("<td>", "", $detail_matches[0][1]);
- $application->address = str_replace("<td>", "", $detail_matches[0][2]);
- //$application->status = str_replace("<td>", "", $detail_matches[0][4]);
-
- //match case number
- $casenumber_pattern = "/caseno=([^&]*)/";
- preg_match($casenumber_pattern, $application_match, $casenumber_matches);
-
- $case_number ="";
- if(sizeof($casenumber_matches)>0){
- $case_number = str_replace("caseno=","", $casenumber_matches[0]);
- }
-
- //if weve found a caase number, then get the details
- if($case_number !=""){
- //Comment and info urls
- $application->info_url = $info_url_base . $case_number;
- $application->comment_url = $comment_url_base . $case_number;
-
- //Get the postcode
- $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
-
- preg_match($postcode_pattern, $application->address, $postcode_matches);
- if(isset($postcode_matches[0])){
- $application->postcode = $postcode_matches[0];
- }
-
- //get full details
- $details_html = "";
- $details_html = safe_scrape_page($info_url_base . $case_number);
-
- //regular expresion and clean
- $full_detail_pattern = '/id="desc" rows="[1-9]" cols="80" class="cDetailInput">([^<]*)/';
- preg_match($full_detail_pattern, $details_html, $full_detail_matches);
- if (isset($full_detail_matches[0])){
- $application->description = substr($full_detail_matches[0], strpos($full_detail_matches[0], ">") + 1);
- }
-
- //only add it if we have a postcode (bit useless otherwise)
- if(is_postcode($application->postcode)){
- array_push($applications, $application);
- }
- }else{
- error_log("Unable to find case number for an application at " . $search_url);
- }
- }
-
- //return
- return $applications;
- }
-
- function scrape_applications_wam ($search_url, $info_url_base, $comment_url_base, $detail_mode = 1){
-
- $applications = array();
- $application_pattern = '/<tr><td class=[^>]*>([^<]*)<\/td><td class=[^>]*><a href="[^"]*">([^<]*)<\/a><\/td><td class=[^>]*>([^<]*)<\/td><td class=[^>]*>([^<]*)<\/td>/';
-
-
- //grab the page
- $html = safe_scrape_page($search_url);
-
- //clean html
- $html = str_replace("\r\n","", $html);
-
- preg_match_all($application_pattern, $html, $application_matches, PREG_SET_ORDER);
-
- foreach ($application_matches as $application_match){
- if ($application_match[4] != 'Current') { continue; }
-
- $application = new Application();
-
- //match the basic details
- $application->council_reference = $application_match[2];
- $case_number = $application_match[2];
- $application->date_received = $application_match[1];
- $application->address = $application_match[3];
- //$application->status = $application_match[4];
-
- //if weve found a caase number, then get the details
- if($case_number !=""){
- //Comment and info urls
- $application->info_url = $info_url_base . $case_number;
- $application->comment_url = $comment_url_base . $case_number;
-
- //Get the postcode
- $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
-
- preg_match($postcode_pattern, $application->address, $postcode_matches);
- if(isset($postcode_matches[0])){
- $application->postcode = $postcode_matches[0];
- }
-
- //get full details
- $details_html = "";
- $details_html = safe_scrape_page($info_url_base . $case_number);
- $details_html = str_replace("\r\n","",$details_html);
-
- //regular expresion and clean. SItes vary a tiny bit in their html, so there's a bit of a hack here
- if ($detail_mode == 1){
- $full_detail_pattern = '/Development:<.*<td colspan="3">([^<]*)<\/td>/';
- }
- if ($detail_mode == 2){
- $full_detail_pattern = '/Development:<\/td><td>([^<]*)/';
- }
-
- preg_match($full_detail_pattern, $details_html, $full_detail_matches);
-
- if (isset($full_detail_matches[1])){
- $application->description = $full_detail_matches[1];
- }
-
- //only add it if we have a postcode (bit useless otherwise)
- if(is_postcode($application->postcode)){
-
- //removed the xy for the moment. It is slowing down the scrape and will be added when the app is parsed anyway (Richard)
- /* $xy = postcode_to_location($application->postcode);
- $application->x = $xy[0];
- $application->y = $xy[1];
- $os = new OSRef($xy[0],$xy[1]);
- $latlon = $os->toLatLng();
- $application->lat = $latlon->lat;
- $application->lon = $latlon->lng;
- */
- array_push($applications, $application);
- }
- }else{
- error_log("Unable to find case number for an application at " . $search_url);
- }
-
- }
-
- //return
- return $applications;
- }
-
- // Council specific scapers
- function scrape_applications_islington ($search_url, $info_url_base, $comment_url_base){
-
- $applications = array();
- $application_pattern = '/<TR>([^<]*)<TD class="lg" valign="top" >([^<]*)<a href([^<]*)<a href=wphappcriteria.display>Search Criteria(.*)([^<]*)<(.*)>([^<]*)<TD class="lg" >([^<]*)<\/TD>([^<]*)<TD class="lg" >([^<]*)<INPUT TYPE=HIDDEN NAME([^>]*)([^<]*)/';
-
- //grab the page
- $html = safe_scrape_page($search_url);
-
- preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
-
- foreach ($application_matches[0] as $application_match){
-
- $application_string = str_replace("\n","", $application_match);
-
- $reference_pattern = '/Search Results<\/a>">([^<]*)/';
- preg_match_all($reference_pattern, $application_string, $reference_matches, PREG_PATTERN_ORDER);
-
- $application = new Application();
-
- //match the applicaiton number
- $application->council_reference = str_replace('Search Results</a>">', "", $reference_matches[0][0]);
-
- //Comment and info urls
- $application->info_url = $info_url_base . $application->council_reference;
- $application->comment_url = $comment_url_base . $application->council_reference;
-
- //get full details
- $details_html = "";
- $details_html = safe_scrape_page($info_url_base . $application->council_reference);
- $details_html = str_replace("\r\n","",$details_html);
-
- //Details
- $full_detail_pattern = '/Proposal:<\/label><\/td>([^<]*)<td colspan="3">([^<]*)/';
-
- preg_match($full_detail_pattern, $details_html, $full_detail_matches);
- if (isset($full_detail_matches[2])){
- $application->description = $full_detail_matches[2];
- }
-
- //Address
- $address_pattern = '/Main location:<\/label><\/td>([^<]*)<td colspan="3">([^<]*)/';
- $address = "";
- preg_match($address_pattern, $details_html, $address_matches);
- if(isset($address_matches[2])){
- $application->address = $address_matches[2];
- }
-
- //postcode
- $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
- preg_match($postcode_pattern, $application->address, $postcode_matches);
- if(isset($postcode_matches[0])){
- $application->postcode = $postcode_matches[0];
- }
-
- //only add it if we have a postcode (bit useless otherwise)
- if(is_postcode($application->postcode)){
- array_push($applications, $application);
- }
- }
-
- //return
- return $applications;
- }
- //validate postcode
- function is_postcode ($postcode){
- $valid = false;
- $postcode=str_replace(" ","",$postcode);
- if(ereg ('^[a-zA-Z]{1,2}[0-9]{1,2}[a-zA-Z]{0,1}[0-9]{1}[a-zA-Z]{2}$', $postcode)){
- $valid = true;
- }
- return $valid;
- }
-
- //Tiny url
- function tiny_url($url,$length=30){
-
- // make nasty big url all small
- if (strlen($url) >= $length){
- $tinyurl = @file ("http://tinyurl.com/api-create.php?url=$url");
-
- if (is_array($tinyurl)){
- $tinyurl = join ('', $tinyurl);
- } else {
- $tinyurl = $url;
- }
- } else {
- $tinyurl = $url;
- }
-
- return $tinyurl;
- }
-
- //Google maps url
- function googlemap_url_from_postcode($postcode, $zoom = 15){
- $postcode = strtolower(str_replace(" ", "+", $postcode));
- return "http://maps.google.com/maps?q=$postcode&z=$zoom";
- }
-
- //postcode to location
- function postcode_to_location($postcode){
-
- $x = 0;
- $y = 0;
-
- $clean_postcode = strtolower($postcode);
- $clean_postcode = str_replace(" ","+", $clean_postcode);
- $url = "http://www.streetmap.co.uk/newsearch.srf?type=Postcode&name=" . $clean_postcode;
-
- $html = file_get_contents($url);
- $x_pattern = "/var _LocationX=\d*;/";
- $y_pattern = "/var _LocationY=\d*;/";
-
- //X
- preg_match($x_pattern, $html, $matches);
- if(sizeof($matches) >0){
- $x = $matches[0];
- $x = str_replace('var _LocationX=',"", $x);
- $x = str_replace(";","", $x);
- }
-
- //Y
- preg_match($y_pattern, $html, $matches);
- if(sizeof($matches) >0){
- $y = str_replace("var _LocationY=","", $matches[0]);
- $y = str_replace(";","", $y);
- }
-
- $return = array();
-
- $return[0] = $x;
- $return[1] = $y;
- return $return;
- }
-
-
-
- function valid_email ($string) {
- $valid = false;
- if (!ereg('^[-!#$%&\'*+\\./0-9=?A-Z^_`a-z{|}~]+'.
- '@'.
- '[-!#$%&\'*+\\/0-9=?A-Z^_`a-z{|}~]+\.'.
- '[-!#$%&\'*+\\./0-9=?A-Z^_`a-z{|}~]+$', $string)) {
- $valid = false;
- } else {
- $valid = true;
- }
-
- return $valid;
- }
-
- function alert_size_to_meters($alert_area_size){
-
- $area_size_meters = 0;
- if ($alert_area_size == "s"){
- $area_size_meters = SMALL_ZONE_SIZE;
- }elseif ($alert_area_size == "m"){
- $area_size_meters = MEDIUM_ZONE_SIZE;
- }elseif ($alert_area_size == "l"){
- $area_size_meters = LARGE_ZONE_SIZE;
- }
- return $area_size_meters;
- }
-
- //Send a text email
- function send_text_email($to, $from_name, $from_email, $subject, $body){
-
- $headers = 'MIME-Version: 1.0' . "\r\n";
- $headers .= 'Content-type: text/plain; charset=iso-8859-1' . "\r\n";
- $headers .= 'From: ' . $from_name. ' <' . $from_email . ">\r\n";
-
- mail($to, $subject, $body, $headers);
-
- }
-
- // Format a date to mysql format
- function mysql_date($date){
- return date("Y-m-d H::i:s", $date);
- }
-
- function safe_scrape_page($url, $method = "GET"){
- error_log(print_r($url, true));
- $page = "";
- for ($i=0; $i < 3; $i++){
- if($page == false){
- if (SCRAPE_METHOD == "PEAR"){
- $page = scrape_page_pear($url, $method);
- }else{
- $page = scrape_page_curl($url, $method);
- }
- }
- }
- return $page;
- }
-
- function scrape_page_pear($url, $method = "GET"){
- $page = "";
- $request = new HTTP_Request($url, array("method" => $method));
- $request->sendRequest();
- $page = $request->getResponseBody();
-
- return $page;
-
- }
-
- function scrape_page_curl($url) {
- $ch = curl_init($url);
- curl_setopt($ch,CURLOPT_RETURNTRANSFER,TRUE);
- curl_setopt($ch,CURLOPT_FOLLOWLOCATION,TRUE);
- return curl_exec($ch);
- }
-
- function display_applications($applications, $authority_name, $authority_short_name){
- //smarty
- $smarty = new Smarty;
- $smarty->force_compile = true;
- $smarty->compile_dir = SMARTY_COMPILE_DIRECTORY;
- $smarty->template_dir = "../templates";
- $smarty->assign("authority_name", $authority_name);
- $smarty->assign("authority_short_name", $authority_short_name);
-
- if (sizeof($applications) > 0){
- $smarty->assign("applications", $applications);
- }
-
- $smarty->display("xml.tpl");
- }
-
- function get_time_from_get(){
-
- //if any get params were passed, overwrite the default date
- if (isset($_GET['day'])){
- $day = $_GET['day'];
- }else{
- throw_error("No day set in get string");
- }
- if (isset($_GET['month'])){
- $month = $_GET['month'];
- }else{
- throw_error("No year set in get string");
- }
-
- if (isset($_GET['year'])){
- $year = $_GET['year'];
- }else{
- throw_error("No year set in get string");
- }
-
- return mktime(0,0,0,$month,$day,$year);
-
- }
-
- function throw_error($message){
- throw new exception($message);
- }
-
- ?>
|