Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: projected traffic calculation for meta-tags #573

Open
wants to merge 62 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
2a5de33
fix: rum api integration
dipratap Jan 9, 2025
d7efa09
fix: query method
dipratap Jan 9, 2025
fb3021c
fix: process rum traffic data
dipratap Jan 9, 2025
714a0ed
fix: process rum traffic data
dipratap Jan 9, 2025
3ca4c8c
fix: error
dipratap Jan 9, 2025
2937bf9
fix: error
dipratap Jan 9, 2025
ec12a64
fix: percentage increase based on issue
dipratap Jan 9, 2025
6a1d468
fix: percentage increase based on issue
dipratap Jan 9, 2025
47ac390
fix: error
dipratap Jan 9, 2025
90ec06e
fix: traffic
dipratap Jan 10, 2025
c237ee7
fix: full audit ref remove
dipratap Jan 10, 2025
a9ddcfb
fix: error
dipratap Jan 10, 2025
701f8a0
fix: error
dipratap Jan 10, 2025
b944489
fix: update interval
dipratap Jan 10, 2025
0438f36
fix: add log
dipratap Jan 10, 2025
2fec3cc
Merge branch 'main' into metrics-automate
solaris007 Jan 11, 2025
76c0296
fix: traffic
dipratap Jan 13, 2025
a0f87d1
Merge branch 'metrics-automate' of github.com:adobe/spacecat-audit-wo…
dipratap Jan 13, 2025
d741271
fix: traffic
dipratap Jan 13, 2025
3b306fe
fix: traffic
dipratap Jan 13, 2025
0d553d6
fix: refactoring
dipratap Jan 14, 2025
0e63f39
fix: test
dipratap Jan 14, 2025
86245ec
fix: calculate cpc value and add tests
dipratap Jan 15, 2025
a34e9bf
fix: add comments and some fixes
dipratap Jan 16, 2025
d2f5967
fix: tests
dipratap Jan 16, 2025
b1d36e5
fix: tests
dipratap Jan 16, 2025
4032765
fix: tests
dipratap Jan 16, 2025
fbec486
fix: tests
dipratap Jan 17, 2025
70c21ae
fix: tests
dipratap Jan 17, 2025
2b7fbff
fix: tests
dipratap Jan 17, 2025
026bc68
fix: tests
dipratap Jan 17, 2025
37ef497
fix: tests
dipratap Jan 17, 2025
c9ca2e7
fix: tests
dipratap Jan 17, 2025
125ca4c
fix: tests
dipratap Jan 17, 2025
841d113
fix: tests
dipratap Jan 17, 2025
9beae9e
fix: tests
dipratap Jan 17, 2025
b8472a0
fix: tests
dipratap Jan 17, 2025
c7791dc
fix: tests
dipratap Jan 17, 2025
9bf709a
fix: tests
dipratap Jan 17, 2025
f157a6d
fix: tests
dipratap Jan 17, 2025
a52680a
fix: tests
dipratap Jan 17, 2025
2c1870f
fix: update oppty
dipratap Jan 20, 2025
e4013b8
fix: update oppty
dipratap Jan 20, 2025
415404e
fix: test
dipratap Jan 20, 2025
3a766e2
fix: add comment to explain calculation
dipratap Jan 21, 2025
12c4bd0
fix: lint error
dipratap Jan 21, 2025
d7a0dec
fix: log message
dipratap Jan 28, 2025
2967fc7
fix: merge conflicts
dipratap Jan 28, 2025
87e274f
fix: use earned traffic only
dipratap Jan 28, 2025
9233147
fix: use earned traffic only
dipratap Jan 28, 2025
2fde9db
fix: set rum time duration
dipratap Feb 4, 2025
d9fcccd
fix: tests
dipratap Feb 4, 2025
957c2eb
fix: log
dipratap Feb 7, 2025
dc99049
fix: log
dipratap Feb 7, 2025
0e913d9
fix: log
dipratap Feb 7, 2025
7c2fd75
fix: log
dipratap Feb 7, 2025
101bf6d
fix: log
dipratap Feb 7, 2025
4956e70
fix: log
dipratap Feb 9, 2025
702cd29
fix: log
dipratap Feb 9, 2025
544743c
fix: log
dipratap Feb 9, 2025
f545af8
fix: log
dipratap Feb 9, 2025
12ca449
fix: log
dipratap Feb 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 90 additions & 5 deletions src/metatags/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
* governing permissions and limitations under the License.
*/

import RUMAPIClient from '@adobe/spacecat-shared-rum-api-client';
import { getObjectFromKey, getObjectKeysUsingPrefix } from '../utils/s3-utils.js';
import SeoChecks from './seo-checks.js';
import { AuditBuilder } from '../common/audit-builder.js';
import { noopUrlResolver } from '../common/audit.js';
import convertToOpportunity from './opportunityHandler.js';
import { calculateCPCValue, getRUMDomainkey } from '../support/utils.js';
import { noopUrlResolver, wwwUrlResolver } from '../common/audit.js';
import { AuditBuilder } from '../common/audit-builder.js';

export async function fetchAndProcessPageObject(s3Client, bucketName, key, prefix, log) {
const object = await getObjectFromKey(s3Client, bucketName, key, log);
Expand All @@ -32,6 +34,85 @@ export async function fetchAndProcessPageObject(s3Client, bucketName, key, prefi
};
}

// Extract endpoint from a url, removes trailing slash if present
function extractEndpoint(url) {
const urlObj = new URL(url);
return urlObj.pathname.replace(/\/$/, '');
}

// Preprocess RUM data into a map with endpoint as the key
function preprocessRumData(rumDataMonthly, rumDataBiMonthly) {
const rumDataMapMonthly = new Map();
const rumDataMapBiMonthly = new Map();
rumDataMonthly.forEach((item) => {
const endpoint = extractEndpoint(item.url);
rumDataMapMonthly.set(endpoint, item);
});
rumDataBiMonthly.forEach((item) => {
const endpoint = extractEndpoint(item.url);
rumDataMapBiMonthly.set(endpoint, item);
});
return {
rumDataMapMonthly,
rumDataMapBiMonthly,
};
}

// Get organic traffic for a given endpoint
function getOrganicTrafficForEndpoint(endpoint, rumDataMapMonthly, rumDataMapBiMonthly, log) {
// remove trailing slash from endpoint, if present, and then find in the datamap
const target = rumDataMapMonthly.get(endpoint.replace(/\/$/, ''))
|| rumDataMapBiMonthly.get(endpoint.replace(/\/$/, ''));
if (!target) {
log.warn(`No rum data found for ${endpoint}.`);
return 0;
}
const trafficSum = target.earned + target.paid;
log.info(`Found ${trafficSum} page views for ${endpoint}.`);
return trafficSum;
}

// Calculate the projected traffic lost for a site
async function calculateProjectedTraffic(context, site, detectedTags, log) {
const rumAPIClient = RUMAPIClient.createFrom(context);
const domainkey = await getRUMDomainkey(site.getBaseURL(), context);
const options = {
domain: wwwUrlResolver(site),
domainkey,
interval: 30,
granularity: 'DAILY',
};
const queryResultsMonthly = await rumAPIClient.query('traffic-acquisition', options);
const queryResultsBiMonthly = await rumAPIClient.query('traffic-acquisition', {
...options,
interval: 60,
});
const { rumDataMapMonthly, rumDataMapBiMonthly } = preprocessRumData(
queryResultsMonthly,
queryResultsBiMonthly,
);
let projectedTraffic = 0;
log.warn(`Detected Tags: ${JSON.stringify(detectedTags)}`);
Object.entries(detectedTags).forEach(([endpoint, tags]) => {
log.warn(`Checking for endpoint: ${endpoint} !!`);
const organicTraffic = getOrganicTrafficForEndpoint(
endpoint,
rumDataMapMonthly,
rumDataMapBiMonthly,
log,
);
log.warn(`traffic for endpoint: ${endpoint} : ${organicTraffic} !!`);
Object.values((tags)).forEach((tagIssueDetails) => {
// Multiplying by 1% for missing tags, and 0.5% for other tag issues
// For duplicate tags, each page's traffic is multiplied by .5% so
// it amounts to 0.5% * number of duplicates.
const multiplier = tagIssueDetails.issue.includes('Missing') ? 0.01 : 0.005;
projectedTraffic += organicTraffic * multiplier;
});
});
return projectedTraffic;
}

export async function auditMetaTagsRunner(baseURL, context, site) {
const { log, s3Client } = context;
// Fetch site's scraped content from S3
Expand Down Expand Up @@ -59,14 +140,18 @@ export async function auditMetaTagsRunner(baseURL, context, site) {
}
seoChecks.finalChecks();
const detectedTags = seoChecks.getDetectedTags();

const projectedTrafficLost = await calculateProjectedTraffic(context, site, detectedTags, log);
const cpcValue = await calculateCPCValue(context, site.getId());
log.info(`Calculated cpc value: ${cpcValue} for site: ${site.getId()}`);
const projectedTrafficValue = projectedTrafficLost * cpcValue;
const auditResult = {
detectedTags,
sourceS3Folder: `${bucketName}/${prefix}`,
fullAuditRef: 'na',
fullAuditRef: '',
finalUrl: baseURL,
projectedTrafficLost,
projectedTrafficValue,
};

return {
auditResult,
fullAuditRef: baseURL,
Expand Down
11 changes: 9 additions & 2 deletions src/metatags/opportunityHandler.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ export default async function convertToOpportunity(auditUrl, auditData, context)
throw new Error(`Failed to fetch opportunities for siteId ${auditData.siteId}: ${e.message}`);
}

const { detectedTags, projectedTrafficLost, projectedTrafficValue } = auditData.auditResult;
try {
if (!metatagsOppty) {
const opportunityData = {
Expand All @@ -180,19 +181,25 @@ export default async function convertToOpportunity(auditUrl, auditData, context)
],
},
tags: ['Traffic acquisition'],
data: {
projectedTrafficLost,
projectedTrafficValue,
},
};
metatagsOppty = await Opportunity.create(opportunityData);
log.debug('Meta-tags Opportunity created');
} else {
metatagsOppty.setAuditId(auditData.siteId);
metatagsOppty.setData({
projectedTrafficLost,
projectedTrafficValue,
});
await metatagsOppty.save();
}
} catch (e) {
log.error(`Creating meta-tags opportunity for siteId ${auditData.siteId} failed with error: ${e.message}`, e);
throw new Error(`Failed to create meta-tags opportunity for siteId ${auditData.siteId}: ${e.message}`);
}

const { detectedTags } = auditData.auditResult;
const suggestions = [];
// Generate suggestions data to be inserted in meta-tags opportunity suggestions
Object.keys(detectedTags).forEach((endpoint) => {
Expand Down
43 changes: 43 additions & 0 deletions src/support/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { ListObjectsV2Command } from '@aws-sdk/client-s3';
import { getObjectFromKey } from '../utils/s3-utils.js';

URI.preventInvalidHostname = true;
const DEFAULT_CPC_VALUE = 1; // $1

// weekly pageview threshold to eliminate urls with lack of samples

Expand Down Expand Up @@ -234,6 +235,48 @@ export const extractLinksFromHeader = (data, baseUrl, log) => {
return links;
};

/**
* Fetches the organic traffic data for a site from S3 and calculate the CPC value as per
* https://wiki.corp.adobe.com/pages/viewpage.action?spaceKey=AEMSites&title=Success+Studio+Projected+Business+Impact+Metrics#SuccessStudioProjectedBusinessImpactMetrics-IdentifyingCPCvalueforadomain
* @param context
* @param siteId
* @returns {number} CPC value
*/
export async function calculateCPCValue(context, siteId) {
if (!context?.env?.S3_IMPORTER_BUCKET_NAME) {
throw new Error('S3 importer bucket name is required');
}
if (!context.s3Client) {
throw new Error('S3 client is required');
}
if (!context.log) {
throw new Error('Logger is required');
}
if (!siteId) {
throw new Error('SiteId is required');
}
const { s3Client, log } = context;
const bucketName = context.env.S3_IMPORTER_BUCKET_NAME;
const key = `metrics/${siteId}/ahrefs/organic-traffic.json`;
try {
const organicTrafficData = await getObjectFromKey(s3Client, bucketName, key, log);
if (!Array.isArray(organicTrafficData) || organicTrafficData.length === 0) {
log.warn(`Organic traffic data not available for ${siteId}. Using Default CPC value.`);
return DEFAULT_CPC_VALUE;
}
const lastTraffic = organicTrafficData[organicTrafficData.length - 1];
if (!lastTraffic.cost || !lastTraffic.value) {
log.warn(`Invalid organic traffic data present for ${siteId} - cost:${lastTraffic.cost} value:${lastTraffic.value}, Using Default CPC value.`);
return DEFAULT_CPC_VALUE;
}
// dividing by 100 for cents to dollar conversion
return lastTraffic.cost / lastTraffic.value / 100;
} catch (err) {
log.error(`Error fetching organic traffic data for site ${siteId}. Using Default CPC value.`, err);
return DEFAULT_CPC_VALUE;
}
}

export const getScrapedDataForSiteId = async (site, context) => {
const { s3Client, env, log } = context;
const siteId = site.getId();
Expand Down
11 changes: 6 additions & 5 deletions src/utils/s3-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ export async function getObjectKeysUsingPrefix(
let continuationToken = null;
if (!s3Client || !bucketName || !prefix) {
log.error(
`Invalid input parameters: ensure s3Client, bucketName:${bucketName}, and prefix:${prefix} are provided.`,
`Invalid input parameters in getObjectKeysUsingPrefix: ensure s3Client, bucketName:${bucketName}, and prefix:${prefix} are provided.`,
);
throw new Error(
'Invalid input parameters: ensure s3Client, bucketName, and prefix are provided.',
'Invalid input parameters in getObjectKeysUsingPrefix: ensure s3Client, bucketName, and prefix are provided.',
);
}
try {
Expand All @@ -41,7 +41,9 @@ export async function getObjectKeysUsingPrefix(
// eslint-disable-next-line no-await-in-loop
const data = await s3Client.send(new ListObjectsV2Command(params));
data?.Contents?.forEach((obj) => {
objectKeys.push(obj.Key);
if (obj.Key?.endsWith('scrape.json')) {
objectKeys.push(obj.Key);
}
});
continuationToken = data?.NextContinuationToken;
} while (continuationToken);
Expand Down Expand Up @@ -72,7 +74,7 @@ export async function getObjectKeysUsingPrefix(
export async function getObjectFromKey(s3Client, bucketName, key, log) {
if (!s3Client || !bucketName || !key) {
log.error(
'Invalid input parameters: ensure s3Client, bucketName, and key are provided.',
'Invalid input parameters in getObjectFromKey: ensure s3Client, bucketName, and key are provided.',
);
return null;
}
Expand All @@ -93,7 +95,6 @@ export async function getObjectFromKey(s3Client, bucketName, key, log) {
return null;
}
}

// Always return body for non-JSON content types
return body;
} catch (err) {
Expand Down
Loading