Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prevent accented urls from showing up in site metadata. #22094

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion ghost/core/core/frontend/meta/canonical-url.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const _ = require('lodash');
const urlUtils = require('../../shared/url-utils');
const getUrl = require('./url');
const {fixAnyNonStandardChars} = require('./utils');

function getCanonicalUrl(data) {
if ((_.includes(data.context, 'post') || _.includes(data.context, 'page'))
Expand All @@ -17,7 +18,9 @@ function getCanonicalUrl(data) {
if (url.indexOf('/amp/')) {
url = url.replace(/\/amp\/$/i, '/');
}

// fix any urls with unexpected accented characters
url = fixAnyNonStandardChars(url);

return url;
}

Expand Down
4 changes: 4 additions & 0 deletions ghost/core/core/frontend/meta/paginated-url.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
const _ = require('lodash');
const urlUtils = require('../../shared/url-utils');
const {fixAnyNonStandardChars} = require('./utils');

function getPaginatedUrl(page, data, absolute) {
// If we don't have enough information, return null right away
if (!data || !data.relativeUrl || !data.pagination) {
return null;
}

// rewrite urls that might contain accented characters
data.relativeUrl = fixAnyNonStandardChars(data.relativeUrl);
Comment on lines +11 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider applying URL sanitization to the final URL instead of modifying input data.

The current implementation modifies the input data directly, which could have unintended side effects in subsequent operations. Consider moving the sanitization to the end of the function.

Apply this change:

 function getPaginatedUrl(page, data, absolute) {
     // If we don't have enough information, return null right away
     if (!data || !data.relativeUrl || !data.pagination) {
         return null;
     }
 
-    // rewrite urls that might contain accented characters
-    data.relativeUrl = fixAnyNonStandardChars(data.relativeUrl);
-
     // routeKeywords.page: 'page'
     const pagePath = urlUtils.urlJoin('/page/');
     
     // ... rest of the function ...
     
-    return urlUtils.urlFor({relativeUrl: newRelativeUrl}, absolute);
+    const finalUrl = urlUtils.urlFor({relativeUrl: newRelativeUrl}, absolute);
+    return fixAnyNonStandardChars(finalUrl);
 }

Committable suggestion skipped: line range outside the PR's diff.


// routeKeywords.page: 'page'
const pagePath = urlUtils.urlJoin('/page/');

Expand Down
28 changes: 28 additions & 0 deletions ghost/core/core/frontend/meta/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
const security = require('@tryghost/security');
const debug = require('@tryghost/debug')('frontend');

function fixAnyNonStandardChars(pathOrUrl) {
let path = pathOrUrl;
let origin = '';

try {
path = new URL(pathOrUrl).pathname;
origin = new URL(pathOrUrl).origin;
} catch {
/* no problem, just means the existing pathOrUrl is not a URL */
}

let returnString = pathOrUrl;

try {
returnString = origin + decodeURIComponent(path)
.split('/')
.map (part => security.string.safe(part))
.join('/');
} catch (err) {
debug('Could not decode path', path, err);
}
return returnString;
}

module.exports = {fixAnyNonStandardChars};
24 changes: 18 additions & 6 deletions ghost/core/test/unit/frontend/meta/canonical-url.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ describe('getCanonicalUrl', function () {
const post = testUtils.DataGenerator.forKnex.createPost();

getUrlStub.withArgs(post, false).returns('/post-url/');
urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/').returns('canonical url');
urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/').returns('http://localhost:9999/post-url/');

getCanonicalUrl(post).should.eql('canonical url');
getCanonicalUrl(post).should.eql('http://localhost:9999/post-url/');

urlUtils.urlJoin.calledOnce.should.be.true();
urlUtils.urlFor.calledOnce.should.be.true();
Expand All @@ -51,9 +51,9 @@ describe('getCanonicalUrl', function () {
const post = testUtils.DataGenerator.forKnex.createPost();

getUrlStub.withArgs(post, false).returns('/post-url/amp/');
urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/amp/').returns('*/amp/');
urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/amp/').returns('http://localhost:9999/post-url/amp/');

getCanonicalUrl(post).should.eql('*/');
getCanonicalUrl(post).should.eql('http://localhost:9999/post-url/');

urlUtils.urlJoin.calledOnce.should.be.true();
urlUtils.urlFor.calledOnce.should.be.true();
Expand All @@ -62,9 +62,21 @@ describe('getCanonicalUrl', function () {

it('should return home if empty secure data', function () {
getUrlStub.withArgs({secure: true}, false).returns('/');
urlUtils.urlJoin.withArgs('http://localhost:9999', '/').returns('canonical url');
urlUtils.urlJoin.withArgs('http://localhost:9999', '/').returns('http://localhost:9999/post-url/');

getCanonicalUrl({secure: true}).should.eql('canonical url');
getCanonicalUrl({secure: true}).should.eql('http://localhost:9999/post-url/');

urlUtils.urlJoin.calledOnce.should.be.true();
urlUtils.urlFor.calledOnce.should.be.true();
getUrlStub.calledOnce.should.be.true();
});

it('should remove any strange characters from the url', function () {
const post = testUtils.DataGenerator.forKnex.createPost();

getUrlStub.withArgs(post, false).returns('/post-url/strange-à-characters/');
urlUtils.urlJoin.withArgs('http://localhost:9999', '/post-url/strange-à-characters/').returns('http://localhost:9999/strange-à-characters');
getCanonicalUrl(post).should.eql('http://localhost:9999/strange-a-characters');

urlUtils.urlJoin.calledOnce.should.be.true();
urlUtils.urlFor.calledOnce.should.be.true();
Expand Down