-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmap.php
70 lines (56 loc) · 1.42 KB
/
map.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
<?php
/**
* SEO Mapper
* Maps old (alt.csv) URLs in best effort to new URLS (neu.csv) for example to create a redirect list on relaunches
*/
$old = file('alt.csv');
$new = file('neu.csv');
function extractWords($str)
{
$str = str_replace('index.php', '', $str);
$str = urldecode($str);
preg_match_all('/\pL{4,}/iu', $str, $m);
$words = $m[0];
return array_map('mb_strtolower', $words);
}
/**
* Absolute number of matches
* @param $old
* @param $new
*
* @return int
*/
function match($old, $new) {
$words1 = extractWords($old);
$words2 = extractWords($new);
// echo implode(' ', $words1) . PHP_EOL;
// echo implode(' ', $words2) . PHP_EOL;
$matches = count(array_intersect($words1, $words2));
return $matches;
}
/**
* Find best matching URL from an old URL and an array of new urls
*
* @param $oldUrl
* @param $new
*
* @return string
*/
function findBestFit($oldUrl, $new) {
$differencesTo = [];
foreach($new as $newUrl) {
$differencesTo[$newUrl] = match($oldUrl, $newUrl);
}
arsort($differencesTo);
$value = reset($differencesTo);
if ($value === 0) {
return 'default';
}
return trim(key($differencesTo));
}
foreach($old as $oldUrl) {
# $oldUrl = str_replace('http://example-old.com', '', $oldUrl);
$oldUrl = trim($oldUrl);
$newUrl = trim($newUrl);
echo '"' . $oldUrl . '","' . findBestFit($oldUrl, $new) . '"' . PHP_EOL;
}