You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.7 KiB

  1. require "merton_planning_formatter/version"
  2. require 'pragmatic_segmenter'
  3. module MertonPlanningFormatter
  4. class Error < StandardError; end
  5. def self.format(s)
  6. ps = PragmaticSegmenter::Segmenter.new(text: s, language: 'en')
  7. sentences = ps.segment
  8. output = []
  9. sentences.each do |sentence|
  10. sentence = sentence.downcase.capitalize
  11. # Patterns have uppercase and lowercase alertnatives for the first letter to ensure that they match at the start of a sentence when the first letter will already be capitalised.
  12. # Abbreviations
  13. sentence.gsub! /\b[lL]bm\b/, 'LBM'
  14. sentence.gsub! /\b[lL]b\b/, 'LB'
  15. sentence.gsub! /\b[wW]c\b/, 'WC'
  16. sentence.gsub! /\b[lL]ondon borough of merton\b/, 'London Borough of Merton'
  17. sentence.gsub! /\b[mM]erton\b/, 'Merton'
  18. # Districts
  19. # https://en.wikipedia.org/wiki/London_Borough_of_Merton#Districts
  20. sentence.gsub! /\b[bB]ushey mead\b/, 'Bushey Mead'
  21. sentence.gsub! /\b[cC]olliers wood\b/, 'Colliers Wood'
  22. sentence.gsub! /\b[cC]opse hill\b/, 'Copse Hill'
  23. sentence.gsub! /\b[cC]ottenham park\b/, 'Cottenham Park'
  24. sentence.gsub! /\b[cC]rooked bill?ett?\b/, 'Crooked Billet'
  25. sentence.gsub! /\b[lL]ower morden\b/, 'Lower Morden'
  26. sentence.gsub! /\b[mM]erton park\b/, 'Merton Park'
  27. sentence.gsub! /\b[mM]itcham\b/, 'Mitcham'
  28. sentence.gsub! /\b[mM]itcham common\b/, 'Mitcham Common'
  29. sentence.gsub! /\b[mM]orden\b/, 'Morden'
  30. sentence.gsub! /\b[mM]orden park\b/, 'Morden Park'
  31. sentence.gsub! /\b[mM]otspur park\b/, 'Motspur Park'
  32. sentence.gsub! /\b[nN]ew malden\b/, 'New Malden'
  33. sentence.gsub! /\b[nN]orbury\b/, 'Norbury'
  34. sentence.gsub! /\b[pP]ollards hill\b/, 'Pollards Hill'
  35. sentence.gsub! /\b[rR]aynes park\b/, 'Raynes Park'
  36. sentence.gsub! /\b[sS]t\.? Helier\b/, 'St. Helier'
  37. sentence.gsub! /\b[sS]outh Wimbledon\b/, 'South Wimbledon'
  38. sentence.gsub! /\b[sS]ummerstown\b/, 'Summerstown'
  39. sentence.gsub! /\b[wW]imbledon\b/, 'Wimbledon'
  40. sentence.gsub! /\b[wW]imbledon park\b/, 'Wimbledon Park'
  41. # Use classes
  42. # https://www.planningportal.co.uk/info/200130/common_projects/9/change_of_use
  43. sentence.gsub! /\b[aA](\d)\b/, 'A\1'
  44. sentence.gsub! /\b[bB](\d)([abc]?)\b/, 'B\1\2'
  45. sentence.gsub! /\b[cC](\d)([abc]?)\b/, 'C\1\2'
  46. sentence.gsub! /\b[dD](\d)\b/, 'D\1'
  47. # Trees
  48. sentence.gsub! /\b[tT]po\b/, 'TPO'
  49. sentence.gsub! /\b[tT](\d+)\b/, 'T\1'
  50. sentence.gsub! /\b[mM]er(\d+)\b/, 'MER\1'
  51. sentence.gsub! /\b[mM](\d+)\b/, 'M\1'
  52. # Case reference numbers, eg 18/P1234
  53. sentence.gsub! /\b(\d{2})\/p(\d+)\b/, '\1/P\2'
  54. output << sentence
  55. end
  56. output.join(' ')
  57. end
  58. end