Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiment: add Bing integration tests #896

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
f9c8f11
Add integration tests for bing
neilbmclaughlin Jan 22, 2025
434690a
Refactor parser to use map function in prep for returning >1 result
neilbmclaughlin Jan 22, 2025
26cfd25
Remove unused parameter
neilbmclaughlin Jan 22, 2025
6d933fd
Fix broken tests
neilbmclaughlin Jan 27, 2025
d12602e
Remove redundant parameter from tests
neilbmclaughlin Jan 27, 2025
ac7a645
Refactor to allow confidence filter to be overriden
neilbmclaughlin Jan 27, 2025
8afbee1
Add the get method for location search to find by slug
neilbmclaughlin Jan 27, 2025
a162e17
Use shared utility slugify
neilbmclaughlin Jan 27, 2025
9b410a0
Remove default prefilter from parser and set it in the location service
neilbmclaughlin Jan 27, 2025
8f1d764
Fix for locations which have had their names formatted
neilbmclaughlin Jan 27, 2025
722a75f
SonarCloud: remove unused parameter
neilbmclaughlin Jan 27, 2025
f5b407d
SonarCloud: remove redundant await
neilbmclaughlin Jan 27, 2025
2f01318
Refactor to more specific use cases - find and get
neilbmclaughlin Feb 5, 2025
02e8076
Extract filter, mapper and sort functions
neilbmclaughlin Feb 6, 2025
d19d9e5
Use specific implementations for find and get
neilbmclaughlin Feb 6, 2025
1c4ac28
Move previously failing test towns/villages into passing list
neilbmclaughlin Feb 6, 2025
48866f7
Add sample counties/cities
neilbmclaughlin Feb 6, 2025
3fc8a55
Move bing results processing utilities to their own file
neilbmclaughlin Feb 6, 2025
43d8516
Add tests for slugify
neilbmclaughlin Feb 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion server/routes/alerts-and-warnings.js
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ async function locationRouteHandler (request, h) {
const canonicalUrl = request.url.origin + request.url.pathname
const location = util.cleanseLocation(request.params.location)

const [place] = await locationService.find(location)
const [place] = await locationService.get(location)

if (isLocationEngland(location)) {
return h.redirect(`/${route}`)
Expand Down
4 changes: 2 additions & 2 deletions server/routes/location.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ async function routeHandler (request, h) {
return h.redirect('/')
}

const [place] = await locationService.find(location)
const [place] = await locationService.get(location)

if (place?.slug !== location) {
if (!place) {
return boom.notFound(`Location ${location} not found`)
}

Expand Down
2 changes: 1 addition & 1 deletion server/routes/river-and-sea-levels.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ async function locationRouteHandler (request, h) {
const canonicalUrl = request.url.origin + request.url.pathname
const location = util.cleanseLocation(request.params.location)

const [place] = await locationService.find(location)
const [place] = await locationService.get(location)

if (isLocationEngland(location)) {
return h.redirect(`/${route}`)
Expand Down
258 changes: 127 additions & 131 deletions server/services/lib/bing-results-parser.js
Original file line number Diff line number Diff line change
@@ -1,114 +1,78 @@
const { addBufferToBbox, formatName } = require('../../util')
const { addBufferToBbox, formatName, slugify } = require('./bing-utils')

// source: https://en.wikipedia.org/wiki/Ceremonial_counties_of_England
// see also for a description of the difference between ceremonial and administrative counties
const englishCeremonialCounties =
[
'bedfordshire',
'berkshire',
'bristol',
'buckinghamshire',
'cambridgeshire',
'cheshire',
'city of london',
'cornwall',
'cumbria',
'derbyshire',
'devon',
'dorset',
'durham',
'east riding of yorkshire',
'east sussex',
'essex',
'gloucestershire',
'greater london',
'greater manchester',
'hampshire',
'herefordshire',
'hertfordshire',
'isle of wight',
'kent',
'lancashire',
'leicestershire',
'lincolnshire',
'merseyside',
'norfolk',
'north yorkshire',
'northamptonshire',
'northumberland',
'nottinghamshire',
'oxfordshire',
'rutland',
'shropshire',
'somerset',
'south yorkshire',
'staffordshire',
'suffolk',
'surrey',
'tyne and wear',
'warwickshire',
'west midlands',
'west sussex',
'west yorkshire',
'wiltshire',
'worcestershire'
]

function slugify (text = '') {
return text.replace(/,/g, '').replace(/ /g, '-').toLowerCase()
}

async function bingResultsParser (bingData) {
const set = bingData.resourceSets[0]
if (set.estimatedTotal === 0) {
return []
}

// following discussion with team, going to try out only high confidence
// results. This should reduce spurious results.
const allowedConfidences = ['high']

// note that allowedTypes also captures precedance rules for when multiple
// results are returned (e.g admindivision2 takes precedance over admindivision1)
const allowedTypes = [
'postcode1',
'postcode3',
'admindivision1',
'admindivision2',
'populatedplace',
'neighborhood'
[
'bedfordshire',
'berkshire',
'bristol',
'buckinghamshire',
'cambridgeshire',
'cheshire',
'city of london',
'cornwall',
'cumbria',
'derbyshire',
'devon',
'dorset',
'durham',
'east riding of yorkshire',
'east sussex',
'essex',
'gloucestershire',
'greater london',
'greater manchester',
'hampshire',
'herefordshire',
'hertfordshire',
'isle of wight',
'kent',
'lancashire',
'leicestershire',
'lincolnshire',
'merseyside',
'norfolk',
'north yorkshire',
'northamptonshire',
'northumberland',
'nottinghamshire',
'oxfordshire',
'rutland',
'shropshire',
'somerset',
'south yorkshire',
'staffordshire',
'suffolk',
'surrey',
'tyne and wear',
'warwickshire',
'west midlands',
'west sussex',
'west yorkshire',
'wiltshire',
'worcestershire'
]

function englandOnlyFilter (r) {
if (r.entityType.toLowerCase() === 'admindivision1') {
return englishCeremonialCounties.indexOf(r.name.toLowerCase()) >= 0
}

return r.address.adminDistrict.toLowerCase() === 'england'
}

const data = set.resources
.filter(r => allowedConfidences.includes(r.confidence.toLowerCase()))
.filter(r => allowedTypes.includes(r.entityType.toLowerCase()))
.filter(r => englandOnlyFilter(r))
.sort((a, b) =>
allowedTypes.indexOf(a.entityType.toLowerCase()) -
allowedTypes.indexOf(b.entityType.toLowerCase()))
.sort((a, b) =>
allowedConfidences.indexOf(a.confidence.toLowerCase()) -
allowedConfidences.indexOf(b.confidence.toLowerCase()))[0]

if (!data) {
return []
}

const {
bbox,
point: { coordinates: center }
} = data

const name = formatName(data.name)
// note that allowedTypes also captures precedance rules for when multiple
// results are returned (e.g admindivision2 takes precedance over admindivision1)
const allowedTypes = [
'postcode1',
'postcode3',
'admindivision1',
'admindivision2',
'populatedplace',
'neighborhood'
]

const distanceInMetres = {
'2k': 2000,
'10k': 10000
}

const mapper = (r) => {
const name = formatName(r.name)
const bbox = r.bbox.reverse()
// query is the value to use in a search box or the slug to replicate the
// search and get the same result. If the bing format of the name (place
// name + postcode) is used then some postcode searches which were
Expand All @@ -118,39 +82,71 @@ async function bingResultsParser (bingData) {
// This causes problems with validity checking
// Retained both name and query for display purposes for post codes
// (even though name and query are the are the same for non-postcodes)
const query = ['postcode1', 'postcode3'].includes(data.entityType.toLowerCase())
? data.address.postalCode
const query = ['postcode1', 'postcode3'].includes(r.entityType.toLowerCase())
? r.address.postalCode
: name

const slug = slugify(query)

// Reverse as Bing returns as [y (lat), x (long)]
bbox.reverse()
center.reverse()

const isUK = data.address.countryRegionIso2 === 'GB'
return {
name,
query,
slug: slugify(query),
center: r.point.coordinates.reverse(),
bbox2k: addBufferToBbox(bbox, distanceInMetres['2k']),
bbox10k: addBufferToBbox(bbox, distanceInMetres['10k']),
isUK: r.address.countryRegionIso2 === 'GB',
isEngland: { is_england: true }
}
}

// const isEngland = await getIsEngland(center[0], center[1])
const confidenceFilter = (r) => r.confidence.toLowerCase() === 'high'

const distanceInMetres = {
'2k': 2000,
'10k': 10000
const englandOnlyFilter = (r) => {
if (r.entityType.toLowerCase() === 'admindivision1') {
return englishCeremonialCounties.indexOf(r.name.toLowerCase()) >= 0
}

// add buffer to place.bbox for stations search
const bbox2k = addBufferToBbox(bbox, distanceInMetres['2k'])
const bbox10k = addBufferToBbox(bbox, distanceInMetres['10k'])
return r.address.adminDistrict?.toLowerCase() === 'england'
}

return [{
name,
slug,
query,
center,
bbox2k,
bbox10k,
isUK,
isEngland: { is_england: true }
}]
const allowedTypesFilter = (r) =>
allowedTypes.includes(r.entityType.toLowerCase())

const baseFilter = (r) =>
allowedTypesFilter(r) && englandOnlyFilter(r)

const typesSort = (a, b) =>
allowedTypes.indexOf(a.entityType.toLowerCase()) -
allowedTypes.indexOf(b.entityType.toLowerCase())

const removeDuplicatesFilter = (place, index, self) =>
self.findIndex(p => p.slug === place.slug) === index

async function find (bingResponse) {
const set = bingResponse.resourceSets[0]
return set.estimatedTotal
? set.resources
.filter(confidenceFilter)
.filter(baseFilter)
.sort(typesSort)
.map(mapper)
.filter(removeDuplicatesFilter)
: []
}

module.exports = bingResultsParser
async function get (bingResponse, slug) {
const matchingSlugFilter = (r) => r.slug === slug
const set = bingResponse.resourceSets[0]
return set.estimatedTotal
? set.resources
.filter(baseFilter)
.sort(typesSort)
.map(mapper)
.filter(removeDuplicatesFilter)
.filter(matchingSlugFilter)
: []
}

module.exports = {
find,
get
}
48 changes: 48 additions & 0 deletions server/services/lib/bing-utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
const turf = require('@turf/turf')

function removeRepeatingEntries (inputString) {
const itemsArray = inputString.split(',').map(item => item.trim())
const uniqueItemsArray = [...new Set(itemsArray)]
return uniqueItemsArray.join(', ')
}

function hasCityQualifier (itemsArray) {
const regex = new RegExp(`^(Greater|City Of) ${itemsArray[0]}$`, 'i')
return regex.test(itemsArray[1])
}

function removeCityQualifiers (inputString) {
// remove qualifiers such as Greater London and City Of Portsmouth from the final entry in a place name
// e.g. Camberwell, London, Greater London => Camberwell, London
// e.g. London, Greater London => London
const splitToken = ', '
const itemsArray = inputString.split(splitToken)
const length = itemsArray.length
const penultimate = -2
if (length >= 2 && hasCityQualifier(itemsArray.slice(penultimate))) {
return itemsArray.slice(0, -1).join(splitToken)
}
return inputString
}

function formatName (name) {
if (!name) {
return ''
}
return removeCityQualifiers(removeRepeatingEntries(name))
}

function slugify (text = '') {
return text.replace(/,/g, '').replace(/ /g, '-').toLowerCase()
}

function addBufferToBbox (bbox, m) {
// Convert bbox (binding box) )into polygon, add buffer, and convert back to bbox as db query needs a bbox envelope
return turf.bbox(turf.buffer(turf.bboxPolygon(bbox), m, { units: 'meters' }))
}

module.exports = {
formatName,
slugify,
addBufferToBbox
}
Loading