diff --git a/ghost/core/core/frontend/meta/canonical-url.js b/ghost/core/core/frontend/meta/canonical-url.js index 40e21551913f..0e4fb40740c0 100644 --- a/ghost/core/core/frontend/meta/canonical-url.js +++ b/ghost/core/core/frontend/meta/canonical-url.js @@ -1,6 +1,7 @@ const _ = require('lodash'); const urlUtils = require('../../shared/url-utils'); const getUrl = require('./url'); +const {fixAnyNonStandardChars} = require('./utils'); function getCanonicalUrl(data) { if ((_.includes(data.context, 'post') || _.includes(data.context, 'page')) @@ -17,7 +18,9 @@ function getCanonicalUrl(data) { if (url.indexOf('/amp/')) { url = url.replace(/\/amp\/$/i, '/'); } - + // fix any urls with unexpected accented characters + url = fixAnyNonStandardChars(url); + return url; } diff --git a/ghost/core/core/frontend/meta/paginated-url.js b/ghost/core/core/frontend/meta/paginated-url.js index c57adff91a79..01fa8551e012 100644 --- a/ghost/core/core/frontend/meta/paginated-url.js +++ b/ghost/core/core/frontend/meta/paginated-url.js @@ -1,5 +1,6 @@ const _ = require('lodash'); const urlUtils = require('../../shared/url-utils'); +const {fixAnyNonStandardChars} = require('./utils'); function getPaginatedUrl(page, data, absolute) { // If we don't have enough information, return null right away @@ -7,6 +8,9 @@ function getPaginatedUrl(page, data, absolute) { return null; } + // rewrite urls that might contain accented characters + data.relativeUrl = fixAnyNonStandardChars(data.relativeUrl); + // routeKeywords.page: 'page' const pagePath = urlUtils.urlJoin('/page/'); diff --git a/ghost/core/core/frontend/meta/utils.js b/ghost/core/core/frontend/meta/utils.js new file mode 100644 index 000000000000..cb5c6571427e --- /dev/null +++ b/ghost/core/core/frontend/meta/utils.js @@ -0,0 +1,28 @@ +const security = require('@tryghost/security'); +const debug = require('@tryghost/debug')('frontend'); + +function fixAnyNonStandardChars(pathOrUrl) { + let path = pathOrUrl; + let origin = ''; + + try { + path = new URL(pathOrUrl).pathname; + origin = new URL(pathOrUrl).origin; + } catch { + /* no problem, just means the existing pathOrUrl is not a URL */ + } + + let returnString = pathOrUrl; + + try { + returnString = origin + decodeURIComponent(path) + .split('/') + .map (part => security.string.safe(part)) + .join('/'); + } catch (err) { + debug('Could not decode path', path, err); + } + return returnString; +} + +module.exports = {fixAnyNonStandardChars}; \ No newline at end of file diff --git a/ghost/core/test/unit/frontend/meta/canonical-url.test.js b/ghost/core/test/unit/frontend/meta/canonical-url.test.js index 4b5f3ec79212..a4386ac37843 100644 --- a/ghost/core/test/unit/frontend/meta/canonical-url.test.js +++ b/ghost/core/test/unit/frontend/meta/canonical-url.test.js @@ -27,9 +27,9 @@ describe('getCanonicalUrl', function () { const post = testUtils.DataGenerator.forKnex.createPost(); getUrlStub.withArgs(post, false).returns('/post-url/'); - urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/').returns('canonical url'); + urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/').returns('http://localhost:9999/post-url/'); - getCanonicalUrl(post).should.eql('canonical url'); + getCanonicalUrl(post).should.eql('http://localhost:9999/post-url/'); urlUtils.urlJoin.calledOnce.should.be.true(); urlUtils.urlFor.calledOnce.should.be.true(); @@ -51,9 +51,9 @@ describe('getCanonicalUrl', function () { const post = testUtils.DataGenerator.forKnex.createPost(); getUrlStub.withArgs(post, false).returns('/post-url/amp/'); - urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/amp/').returns('*/amp/'); + urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/amp/').returns('http://localhost:9999/post-url/amp/'); - getCanonicalUrl(post).should.eql('*/'); + getCanonicalUrl(post).should.eql('http://localhost:9999/post-url/'); urlUtils.urlJoin.calledOnce.should.be.true(); urlUtils.urlFor.calledOnce.should.be.true(); @@ -62,9 +62,21 @@ describe('getCanonicalUrl', function () { it('should return home if empty secure data', function () { getUrlStub.withArgs({secure: true}, false).returns('/'); - urlUtils.urlJoin.withArgs('http://localhost:9999', '/').returns('canonical url'); + urlUtils.urlJoin.withArgs('http://localhost:9999', '/').returns('http://localhost:9999/post-url/'); - getCanonicalUrl({secure: true}).should.eql('canonical url'); + getCanonicalUrl({secure: true}).should.eql('http://localhost:9999/post-url/'); + + urlUtils.urlJoin.calledOnce.should.be.true(); + urlUtils.urlFor.calledOnce.should.be.true(); + getUrlStub.calledOnce.should.be.true(); + }); + + it('should remove any strange characters from the url', function () { + const post = testUtils.DataGenerator.forKnex.createPost(); + + getUrlStub.withArgs(post, false).returns('/post-url/strange-à-characters/'); + urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/strange-à-characters/').returns('http://localhost:9999/strange-à-characters'); + getCanonicalUrl(post).should.eql('http://localhost:9999/strange-a-characters'); urlUtils.urlJoin.calledOnce.should.be.true(); urlUtils.urlFor.calledOnce.should.be.true();