From ca5588706b33e48a88acb5f3f0ea6b94f965fdfe Mon Sep 17 00:00:00 2001 From: Adrian Short Date: Tue, 25 Sep 2018 21:48:56 +0100 Subject: [PATCH] Improve regex so it doesn't parse "Not Available" as a date (Lewisham) Should we even bother parsing all these fields from the search results pages given that we'll get them on the deeper scrape anyway? --- lib/uk_planning_scraper/idox.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/uk_planning_scraper/idox.rb b/lib/uk_planning_scraper/idox.rb index fd23eb6..191fee1 100644 --- a/lib/uk_planning_scraper/idox.rb +++ b/lib/uk_planning_scraper/idox.rb @@ -74,11 +74,11 @@ module UKPlanningScraper data[:council_reference] = matches[1] end - if matches = bit.match(/(Received|Registered):\s+(.+)/) + if matches = bit.match(/(Received|Registered):\s+.*(\d{2}\s\w{3}\s\d{2}\d{2}?)/) data[:date_received] = Date.parse(matches[2]) end - if matches = bit.match(/Validated:\s+(.+)/) + if matches = bit.match(/Validated:\s+.*(\d{2}\s\w{3}\s\d{2}\d{2}?)/) data[:date_validated] = Date.parse(matches[1]) end