([0-9]*)<\/th>([^;]*)([^<]*)/";
//grab the page
$html = safe_scrape_page($search_url);
//clean html
$html = str_replace("\r\n","", $html);
preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
foreach ($application_matches[0] as $application_match){
$detail_pattern = "/ ([^<])*/";
preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER);
$application = new Application();
//match the basic details
$application->council_reference = str_replace(" ", "", $detail_matches[0][0]);
$application->date_received = str_replace(" ", "", $detail_matches[0][1]);
$application->address = str_replace(" ", "", $detail_matches[0][2]);
//$application->status = str_replace(" ", "", $detail_matches[0][4]);
//match case number
$casenumber_pattern = "/caseno=([^&]*)/";
preg_match($casenumber_pattern, $application_match, $casenumber_matches);
$case_number ="";
if(sizeof($casenumber_matches)>0){
$case_number = str_replace("caseno=","", $casenumber_matches[0]);
}
//if weve found a caase number, then get the details
if($case_number !=""){
//Comment and info urls
$application->info_url = $info_url_base . $case_number;
$application->comment_url = $comment_url_base . $case_number;
//Get the postcode
$postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
preg_match($postcode_pattern, $application->address, $postcode_matches);
if(isset($postcode_matches[0])){
$application->postcode = $postcode_matches[0];
}
//get full details
$details_html = "";
$details_html = safe_scrape_page($info_url_base . $case_number);
//regular expresion and clean
$full_detail_pattern = '/id="desc" rows="[1-9]" cols="80" class="cDetailInput">([^<]*)/';
preg_match($full_detail_pattern, $details_html, $full_detail_matches);
if (isset($full_detail_matches[0])){
$application->description = substr($full_detail_matches[0], strpos($full_detail_matches[0], ">") + 1);
}
//only add it if we have a postcode (bit useless otherwise)
if(is_postcode($application->postcode)){
array_push($applications, $application);
}
}else{
error_log("Unable to find case number for an application at " . $search_url);
}
}
//return
return $applications;
}
function scrape_applications_wam ($search_url, $info_url_base, $comment_url_base){
$applications = array();
$application_pattern = '/ ]*>([^<]*)<\/td> ]*>([^<]*)<\/a><\/td> ]*>([^<]*)<\/td> ]*>([^<]*)<\/td>/';
//grab the page
$html = safe_scrape_page($search_url);
//clean html
$html = str_replace("\r\n","", $html);
preg_match_all($application_pattern, $html, $application_matches, PREG_SET_ORDER);
foreach ($application_matches as $application_match){
if ($application_match[4] != 'Current') { continue; }
$application = new Application();
//match the basic details
$application->council_reference = $application_match[2];
$case_number = $application_match[2];
$application->date_received = $application_match[1];
$application->address = $application_match[3];
//$application->status = $application_match[4];
//if weve found a caase number, then get the details
if($case_number !=""){
//Comment and info urls
$application->info_url = $info_url_base . $case_number;
$application->comment_url = $comment_url_base . $case_number;
//Get the postcode
$postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
preg_match($postcode_pattern, $application->address, $postcode_matches);
if(isset($postcode_matches[0])){
$application->postcode = $postcode_matches[0];
}
//get full details
$details_html = "";
$details_html = safe_scrape_page($info_url_base . $case_number);
$details_html = str_replace("\r\n","",$details_html);
//regular expresion and clean
$full_detail_pattern = '/Development:<.* ([^<]*)<\/td>/';
preg_match($full_detail_pattern, $details_html, $full_detail_matches);
if (isset($full_detail_matches[1])){
$application->description = $full_detail_matches[1];
}
//only add it if we have a postcode (bit useless otherwise)
if(is_postcode($application->postcode)){
//removed the xy for the moment. It is slowing down the scrape and will be added when the app is parsed anyway (Richard)
/* $xy = postcode_to_location($application->postcode);
$application->x = $xy[0];
$application->y = $xy[1];
$os = new OSRef($xy[0],$xy[1]);
$latlon = $os->toLatLng();
$application->lat = $latlon->lat;
$application->lon = $latlon->lng;
*/
array_push($applications, $application);
}
}else{
error_log("Unable to find case number for an application at " . $search_url);
}
}
//return
return $applications;
}
// Council specific scapers
function scrape_applications_islington ($search_url, $info_url_base, $comment_url_base){
$applications = array();
$application_pattern = '/ ([^<]*)([^<]*)Search Criteria(.*)([^<]*)<(.*)>([^<]*) ([^<]*)<\/TD>([^<]*) ([^<]*) ]*)([^<]*)/';
//grab the page
$html = safe_scrape_page($search_url);
preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
foreach ($application_matches[0] as $application_match){
$application_string = str_replace("\n","", $application_match);
$reference_pattern = '/Search Results<\/a>">([^<]*)/';
preg_match_all($reference_pattern, $application_string, $reference_matches, PREG_PATTERN_ORDER);
$application = new Application();
//match the applicaiton number
$application->council_reference = str_replace('Search Results">', "", $reference_matches[0][0]);
//Comment and info urls
$application->info_url = $info_url_base . $application->council_reference;
//$application->comment_url = $comment_url_base . $case_number;
//get full details
$details_html = "";
$details_html = safe_scrape_page($info_url_base . $application->council_reference);
// $details_html = str_replace("\n","", $details_html);
// $details_html = str_replace("\t","", $details_html);
//Details
print $details_html;exit;
//Address
$address_pattern = '/Main location:<\/label><\/td>([^<]*) ([^<]*)/';
$address = "";
preg_match($address_pattern, $details_html, $address_matches);
if(isset($address_matches[2])){
$application->address = $address_matches[2];
}
//postcode
$postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
preg_match($postcode_pattern, $application->address, $postcode_matches);
if(isset($postcode_matches[0])){
$application->postcode = $postcode_matches[0];
}
//only add it if we have a postcode (bit useless otherwise)
if(is_postcode($application->postcode)){
array_push($applications, $application);
}
}
//return
return $applications;
}
//validate postcode
function is_postcode ($postcode){
$valid = false;
$postcode=str_replace(" ","",$postcode);
if(ereg ('^[a-zA-Z]{1,2}[0-9]{1,2}[a-zA-Z]{0,1}[0-9]{1}[a-zA-Z]{2}$', $postcode)){
$valid = true;
}
return $valid;
}
//Tiny url
function tiny_url($url,$length=30){
// make nasty big url all small
if (strlen($url) >= $length){
$tinyurl = @file ("http://tinyurl.com/api-create.php?url=$url");
if (is_array($tinyurl)){
$tinyurl = join ('', $tinyurl);
} else {
$tinyurl = $url;
}
} else {
$tinyurl = $url;
}
return $tinyurl;
}
//Google maps url
function googlemap_url_from_postcode($postcode, $zoom = 15){
$postcode = strtolower(str_replace(" ", "+", $postcode));
return "http://maps.google.com/maps?q=$postcode&z=$zoom";
}
//postcode to location
function postcode_to_location($postcode){
$x = 0;
$y = 0;
$clean_postcode = strtolower($postcode);
$clean_postcode = str_replace(" ","+", $clean_postcode);
$url = "http://www.streetmap.co.uk/newsearch.srf?type=Postcode&name=" . $clean_postcode;
$html = file_get_contents($url);
$x_pattern = "/var _LocationX=\d*;/";
$y_pattern = "/var _LocationY=\d*;/";
//X
preg_match($x_pattern, $html, $matches);
if(sizeof($matches) >0){
$x = $matches[0];
$x = str_replace('var _LocationX=',"", $x);
$x = str_replace(";","", $x);
}
//Y
preg_match($y_pattern, $html, $matches);
if(sizeof($matches) >0){
$y = str_replace("var _LocationY=","", $matches[0]);
$y = str_replace(";","", $y);
}
$return = array();
$return[0] = $x;
$return[1] = $y;
return $return;
}
function valid_email ($string) {
$valid = false;
if (!ereg('^[-!#$%&\'*+\\./0-9=?A-Z^_`a-z{|}~]+'.
'@'.
'[-!#$%&\'*+\\/0-9=?A-Z^_`a-z{|}~]+\.'.
'[-!#$%&\'*+\\./0-9=?A-Z^_`a-z{|}~]+$', $string)) {
$valid = false;
} else {
$valid = true;
}
return $valid;
}
function alert_size_to_meters($alert_area_size){
$area_size_meters = 0;
if ($alert_area_size == "s"){
$area_size_meters = SMALL_ZONE_SIZE;
}elseif ($alert_area_size == "m"){
$area_size_meters = MEDIUM_ZONE_SIZE;
}elseif ($alert_area_size == "l"){
$area_size_meters = LARGE_ZONE_SIZE;
}
return $area_size_meters;
}
//Send a text email
function send_text_email($to, $from_name, $from_email, $subject, $body){
$headers = 'MIME-Version: 1.0' . "\r\n";
$headers .= 'Content-type: text/plain; charset=iso-8859-1' . "\r\n";
$headers .= 'From: ' . $from_name. ' <' . $from_email . ">\r\n";
mail($to, $subject, $body, $headers);
}
// Format a date to mysql format
function mysql_date($date){
return date("Y-m-d H::i:s", $date);
}
function safe_scrape_page($url, $method = "GET"){
error_log(print_r($url, true));
$page = "";
for ($i=0; $i < 3; $i++){
if($page == false){
if (SCRAPE_METHOD == "PEAR"){
$page = scrape_page_pear($url, $method);
}else{
$page = scrape_page_curl($url, $method);
}
}
}
return $page;
}
function scrape_page_pear($url, $method = "GET"){
$page = "";
$request = new HTTP_Request($url, array("method" => $method));
$request->sendRequest();
$page = $request->getResponseBody();
return $page;
}
function scrape_page_curl($url) {
$ch = curl_init($url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,TRUE);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,TRUE);
return curl_exec($ch);
}
function display_applications($applications, $authority_name, $authority_short_name){
//smarty
$smarty = new Smarty;
$smarty->force_compile = true;
$smarty->compile_dir = SMARTY_COMPILE_DIRECTORY;
$smarty->template_dir = "../templates";
$smarty->assign("authority_name", $authority_name);
$smarty->assign("authority_short_name", $authority_short_name);
if (sizeof($applications) > 0){
$smarty->assign("applications", $applications);
}
$smarty->display("xml.tpl");
}
function get_time_from_get(){
//if any get params were passed, overwrite the default date
if (isset($_GET['day'])){
$day = $_GET['day'];
}else{
throw_error("No day set in get string");
}
if (isset($_GET['month'])){
$month = $_GET['month'];
}else{
throw_error("No year set in get string");
}
if (isset($_GET['year'])){
$year = $_GET['year'];
}else{
throw_error("No year set in get string");
}
return mktime(0,0,0,$month,$day,$year);
}
function throw_error($message){
throw new exception($message);
}
?>