Skip to content

Commit 6432252

Browse files
committed
fix(browser): make attachment clicks trusted
1 parent 0ae9766 commit 6432252

5 files changed

Lines changed: 183 additions & 74 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
### Fixed
1010
- Browser: avoid stray attachment removal clicks while still detecting stale chips, and allow completed uploads even if send stays disabled. Original PR #56 by Alex Naidis (@TheCrazyLex) — thank you!
1111
- Browser: dismiss blocking modals when a custom ChatGPT project URL is missing, and harden attachment uploads (force input/change events; retry via DataTransfer; treat “file selected” as insufficient unless the composer shows attachment UI).
12+
- Browser: prefer a trusted (CDP) click on the composer “+” button so attachment uploads work even when ChatGPT ignores synthetic clicks.
1213

1314
## 0.8.2 — 2025-12-30
1415

src/browser/actions/attachments.ts

Lines changed: 131 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ import { logDomFailure } from '../domDebug.js';
66
import { transferAttachmentViaDataTransfer } from './attachmentDataTransfer.js';
77

88
export async function uploadAttachmentFile(
9-
deps: { runtime: ChromeClient['Runtime']; dom?: ChromeClient['DOM'] },
9+
deps: { runtime: ChromeClient['Runtime']; dom?: ChromeClient['DOM']; input?: ChromeClient['Input'] },
1010
attachment: BrowserAttachment,
1111
logger: BrowserLogger,
1212
options?: { expectedCount?: number },
1313
): Promise<boolean> {
14-
const { runtime, dom } = deps;
14+
const { runtime, dom, input } = deps;
1515
if (!dom) {
1616
throw new Error('DOM domain unavailable while uploading attachments.');
1717
}
@@ -294,49 +294,72 @@ export async function uploadAttachmentFile(
294294
};
295295

296296
// New ChatGPT UI hides the real file input behind a composer "+" menu; click it pre-emptively.
297-
await Promise.resolve(
298-
runtime.evaluate({
299-
expression: `(() => {
300-
const selectors = [
301-
'#composer-plus-btn',
302-
'button[data-testid="composer-plus-btn"]',
303-
'[data-testid*="plus"]',
304-
'button[aria-label*="add"]',
305-
'button[aria-label*="attachment"]',
306-
'button[aria-label*="file"]',
307-
];
308-
for (const selector of selectors) {
309-
const el = document.querySelector(selector);
310-
if (el instanceof HTMLElement) {
311-
el.click();
312-
return true;
297+
// Learned: synthetic `.click()` is sometimes ignored (isTrusted checks). Prefer a CDP mouse click when possible.
298+
const clickPlusTrusted = async (): Promise<boolean> => {
299+
if (!input || typeof input.dispatchMouseEvent !== 'function') return false;
300+
const locate = await runtime
301+
.evaluate({
302+
expression: `(() => {
303+
const selectors = [
304+
'#composer-plus-btn',
305+
'button[data-testid="composer-plus-btn"]',
306+
'[data-testid*="plus"]',
307+
'button[aria-label*="add"]',
308+
'button[aria-label*="attachment"]',
309+
'button[aria-label*="file"]',
310+
];
311+
for (const selector of selectors) {
312+
const el = document.querySelector(selector);
313+
if (!(el instanceof HTMLElement)) continue;
314+
const rect = el.getBoundingClientRect();
315+
if (rect.width <= 0 || rect.height <= 0) continue;
316+
el.scrollIntoView({ block: 'center', inline: 'center' });
317+
const nextRect = el.getBoundingClientRect();
318+
return { ok: true, x: nextRect.left + nextRect.width / 2, y: nextRect.top + nextRect.height / 2 };
313319
}
314-
}
315-
return false;
316-
})()`,
317-
returnByValue: true,
318-
}),
319-
).catch(() => undefined);
320-
321-
await delay(250);
320+
return { ok: false };
321+
})()`,
322+
returnByValue: true,
323+
})
324+
.then((res) => res?.result?.value as { ok?: boolean; x?: number; y?: number } | undefined)
325+
.catch(() => undefined);
326+
if (!locate?.ok || typeof locate.x !== 'number' || typeof locate.y !== 'number') return false;
327+
const x = locate.x;
328+
const y = locate.y;
329+
await input.dispatchMouseEvent({ type: 'mouseMoved', x, y });
330+
await input.dispatchMouseEvent({ type: 'mousePressed', x, y, button: 'left', clickCount: 1 });
331+
await input.dispatchMouseEvent({ type: 'mouseReleased', x, y, button: 'left', clickCount: 1 });
332+
return true;
333+
};
322334

323-
// Helper to click the upload menu item (if present) to reveal the real attachment input.
324-
await Promise.resolve(
325-
runtime.evaluate({
326-
expression: `(() => {
327-
const menuItems = Array.from(document.querySelectorAll('[data-testid*="upload"],[data-testid*="attachment"], [role="menuitem"], [data-radix-collection-item]'));
328-
for (const el of menuItems) {
329-
const text = (el.textContent || '').toLowerCase();
330-
const tid = el.getAttribute?.('data-testid')?.toLowerCase?.() || '';
331-
if (tid.includes('upload') || tid.includes('attachment') || text.includes('upload') || text.includes('file')) {
332-
if (el instanceof HTMLElement) { el.click(); return true; }
335+
const clickedTrusted = await clickPlusTrusted().catch(() => false);
336+
if (!clickedTrusted) {
337+
await Promise.resolve(
338+
runtime.evaluate({
339+
expression: `(() => {
340+
const selectors = [
341+
'#composer-plus-btn',
342+
'button[data-testid="composer-plus-btn"]',
343+
'[data-testid*="plus"]',
344+
'button[aria-label*="add"]',
345+
'button[aria-label*="attachment"]',
346+
'button[aria-label*="file"]',
347+
];
348+
for (const selector of selectors) {
349+
const el = document.querySelector(selector);
350+
if (el instanceof HTMLElement) {
351+
el.click();
352+
return true;
353+
}
333354
}
334-
}
335-
return false;
336-
})()`,
337-
returnByValue: true,
338-
}),
339-
).catch(() => undefined);
355+
return false;
356+
})()`,
357+
returnByValue: true,
358+
}),
359+
).catch(() => undefined);
360+
}
361+
362+
await delay(350);
340363

341364
const normalizeForMatch = (value: string): string =>
342365
String(value || '')
@@ -572,23 +595,25 @@ export async function uploadAttachmentFile(
572595
}
573596
574597
// Mark candidates with stable indices so we can select them via DOM.querySelector.
598+
// Learned: ChatGPT sometimes renders a zero-sized file input that does *not* trigger uploads;
599+
// keep it as a fallback, but strongly prefer visible (even sr-only 1x1) inputs.
600+
const localSet = new Set(localInputs);
575601
let idx = 0;
576-
let candidates = inputs.map((el) => {
602+
const candidates = inputs.map((el) => {
577603
const accept = el.getAttribute('accept') || '';
578604
const imageOnly = acceptIsImageOnly(accept);
605+
const rect = el instanceof HTMLElement ? el.getBoundingClientRect() : { width: 0, height: 0 };
606+
const visible = rect.width > 0 && rect.height > 0;
607+
const local = localSet.has(el);
579608
const score =
580609
(el.hasAttribute('multiple') ? 100 : 0) +
581-
(!imageOnly ? 20 : isImageAttachment ? 15 : -500);
610+
(local ? 40 : 0) +
611+
(visible ? 30 : -200) +
612+
(!imageOnly ? 30 : isImageAttachment ? 20 : 5);
582613
el.setAttribute('data-oracle-upload-candidate', 'true');
583614
el.setAttribute('data-oracle-upload-idx', String(idx));
584-
return { idx: idx++, score, imageOnly };
615+
return { idx: idx++, score, imageOnly, visible, local };
585616
});
586-
if (!isImageAttachment) {
587-
const nonImage = candidates.filter((candidate) => !candidate.imageOnly);
588-
if (nonImage.length > 0) {
589-
candidates = nonImage;
590-
}
591-
}
592617
593618
// Prefer higher scores first.
594619
candidates.sort((a, b) => b.score - a.score);
@@ -1739,11 +1764,59 @@ export async function waitForAttachmentVisible(
17391764
}
17401765
}
17411766
1742-
const composerRoot =
1743-
document.querySelector('[data-testid*="composer"]') || document.querySelector('form') || document.body;
1744-
const attachmentSelectors = ['[data-testid*="attachment"]','[data-testid*="chip"]','[data-testid*="upload"]','[data-testid*="file"]'];
1745-
const attachmentMatch = attachmentSelectors.some((selector) =>
1746-
Array.from(document.querySelectorAll(selector)).some(matchNode),
1767+
const promptSelectors = ${JSON.stringify(INPUT_SELECTORS)};
1768+
const sendSelectors = ${JSON.stringify(SEND_BUTTON_SELECTORS)};
1769+
const findPromptNode = () => {
1770+
for (const selector of promptSelectors) {
1771+
const nodes = Array.from(document.querySelectorAll(selector));
1772+
for (const node of nodes) {
1773+
if (!(node instanceof HTMLElement)) continue;
1774+
const rect = node.getBoundingClientRect();
1775+
if (rect.width > 0 && rect.height > 0) return node;
1776+
}
1777+
}
1778+
for (const selector of promptSelectors) {
1779+
const node = document.querySelector(selector);
1780+
if (node) return node;
1781+
}
1782+
return null;
1783+
};
1784+
const attachmentSelectors = [
1785+
'input[type="file"]',
1786+
'[data-testid*="attachment"]',
1787+
'[data-testid*="chip"]',
1788+
'[data-testid*="upload"]',
1789+
'[data-testid*="file"]',
1790+
'[aria-label*="Remove"]',
1791+
'[aria-label*="remove"]',
1792+
];
1793+
const locateComposerRoot = () => {
1794+
const promptNode = findPromptNode();
1795+
if (promptNode) {
1796+
const initial =
1797+
promptNode.closest('[data-testid*="composer"]') ??
1798+
promptNode.closest('form') ??
1799+
promptNode.parentElement ??
1800+
document.body;
1801+
let current = initial;
1802+
let fallback = initial;
1803+
while (current && current !== document.body) {
1804+
const hasSend = sendSelectors.some((selector) => current.querySelector(selector));
1805+
if (hasSend) {
1806+
fallback = current;
1807+
const hasAttachment = attachmentSelectors.some((selector) => current.querySelector(selector));
1808+
if (hasAttachment) return current;
1809+
}
1810+
current = current.parentElement;
1811+
}
1812+
return fallback ?? initial;
1813+
}
1814+
return document.querySelector('form') ?? document.body;
1815+
};
1816+
const composerRoot = locateComposerRoot() ?? document.body;
1817+
1818+
const attachmentMatch = ['[data-testid*="attachment"]','[data-testid*="chip"]','[data-testid*="upload"]','[data-testid*="file"]'].some((selector) =>
1819+
Array.from(composerRoot.querySelectorAll(selector)).some(matchNode),
17471820
);
17481821
if (attachmentMatch) {
17491822
return { found: true, source: 'attachments' };
@@ -1763,30 +1836,15 @@ export async function waitForAttachmentVisible(
17631836
return { found: true, source: 'remove-button' };
17641837
}
17651838
1766-
const cardTexts = Array.from(document.querySelectorAll('[aria-label*="Remove"]')).map((btn) =>
1839+
const cardTexts = Array.from(composerRoot.querySelectorAll('[aria-label*="Remove"]')).map((btn) =>
17671840
btn?.parentElement?.parentElement?.innerText?.toLowerCase?.() ?? '',
17681841
);
17691842
if (cardTexts.some((text) => text.includes(normalized) || (normalizedNoExt.length >= 6 && text.includes(normalizedNoExt)))) {
17701843
return { found: true, source: 'attachment-cards' };
17711844
}
17721845
17731846
const countRegex = /(?:^|\\b)(\\d+)\\s+(?:files?|attachments?)\\b/;
1774-
const fileCountNodes = (() => {
1775-
const nodes = [];
1776-
const seen = new Set();
1777-
const add = (node) => {
1778-
if (!node || seen.has(node)) return;
1779-
seen.add(node);
1780-
nodes.push(node);
1781-
};
1782-
const root = composerRoot;
1783-
const localNodes = root ? Array.from(root.querySelectorAll('button,span,div,[aria-label],[title]')) : [];
1784-
for (const node of localNodes) add(node);
1785-
for (const node of Array.from(document.querySelectorAll('button,span,div,[aria-label],[title]'))) {
1786-
add(node);
1787-
}
1788-
return nodes;
1789-
})();
1847+
const fileCountNodes = Array.from(composerRoot.querySelectorAll('button,span,div,[aria-label],[title]'));
17901848
let fileCount = 0;
17911849
for (const node of fileCountNodes) {
17921850
if (!(node instanceof HTMLElement)) continue;

src/browser/actions/navigation.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,48 @@ import {
77
import { delay } from '../utils.js';
88
import { logDomFailure } from '../domDebug.js';
99

10+
export function installJavaScriptDialogAutoDismissal(
11+
Page: ChromeClient['Page'],
12+
logger: BrowserLogger,
13+
): () => void {
14+
type DialogEvent = { type?: string; message?: string };
15+
const pageAny = Page as unknown as {
16+
on?: (event: string, listener: (params: DialogEvent) => void) => void;
17+
off?: (event: string, listener: (params: DialogEvent) => void) => void;
18+
removeListener?: (event: string, listener: (params: DialogEvent) => void) => void;
19+
handleJavaScriptDialog?: (params: { accept: boolean; promptText?: string }) => Promise<void>;
20+
};
21+
22+
if (typeof pageAny.on !== 'function' || typeof pageAny.handleJavaScriptDialog !== 'function') {
23+
return () => {};
24+
}
25+
26+
const handler = async (params: DialogEvent) => {
27+
const type = typeof params?.type === 'string' ? params.type : 'unknown';
28+
const message = typeof params?.message === 'string' ? params.message : '';
29+
logger(`[nav] dismissing JS dialog (${type})${message ? `: ${message.slice(0, 140)}` : ''}`);
30+
try {
31+
await pageAny.handleJavaScriptDialog?.({ accept: true, promptText: '' });
32+
} catch (error) {
33+
const msg = error instanceof Error ? error.message : String(error);
34+
logger(`[nav] failed to dismiss JS dialog: ${msg}`);
35+
}
36+
};
37+
38+
pageAny.on('javascriptDialogOpening', handler);
39+
return () => {
40+
try {
41+
pageAny.off?.('javascriptDialogOpening', handler);
42+
} catch {
43+
try {
44+
pageAny.removeListener?.('javascriptDialogOpening', handler);
45+
} catch {
46+
// ignore
47+
}
48+
}
49+
};
50+
}
51+
1052
export async function navigateToChatGPT(
1153
Page: ChromeClient['Page'],
1254
Runtime: ChromeClient['Runtime'],

src/browser/index.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {
1919
ensureNotBlocked,
2020
ensureLoggedIn,
2121
ensurePromptReady,
22+
installJavaScriptDialogAutoDismissal,
2223
ensureModelSelection,
2324
submitPrompt,
2425
clearPromptComposer,
@@ -179,6 +180,7 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise<Browse
179180
let runStatus: 'attempted' | 'complete' = 'attempted';
180181
let connectionClosedUnexpectedly = false;
181182
let stopThinkingMonitor: (() => void) | null = null;
183+
let removeDialogHandler: (() => void) | null = null;
182184
let appliedCookies = 0;
183185

184186
try {
@@ -211,6 +213,7 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise<Browse
211213
domainEnablers.push(DOM.enable());
212214
}
213215
await Promise.all(domainEnablers);
216+
removeDialogHandler = installJavaScriptDialogAutoDismissal(Page, logger);
214217
if (!manualLogin) {
215218
await Network.clearBrowserCookies();
216219
}
@@ -429,7 +432,7 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise<Browse
429432
const attachment = submissionAttachments[attachmentIndex];
430433
logger(`Uploading attachment: ${attachment.displayPath}`);
431434
const uiConfirmed = await uploadAttachmentFile(
432-
{ runtime: Runtime, dom: DOM },
435+
{ runtime: Runtime, dom: DOM, input: Input },
433436
attachment,
434437
logger,
435438
{ expectedCount: attachmentIndex + 1 },
@@ -756,6 +759,7 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise<Browse
756759
} catch {
757760
// ignore
758761
}
762+
removeDialogHandler?.();
759763
removeTerminationHooks?.();
760764
if (!effectiveKeepBrowser) {
761765
if (!connectionClosedUnexpectedly) {
@@ -967,6 +971,7 @@ async function runRemoteBrowserMode(
967971
let answerHtml = '';
968972
let connectionClosedUnexpectedly = false;
969973
let stopThinkingMonitor: (() => void) | null = null;
974+
let removeDialogHandler: (() => void) | null = null;
970975

971976
try {
972977
const connection = await connectToRemoteChrome(host, port, logger, config.url);
@@ -984,6 +989,7 @@ async function runRemoteBrowserMode(
984989
domainEnablers.push(DOM.enable());
985990
}
986991
await Promise.all(domainEnablers);
992+
removeDialogHandler = installJavaScriptDialogAutoDismissal(Page, logger);
987993

988994
// Skip cookie sync for remote Chrome - it already has cookies
989995
logger('Skipping cookie sync for remote Chrome (using existing session)');
@@ -1293,6 +1299,7 @@ async function runRemoteBrowserMode(
12931299
} catch {
12941300
// ignore
12951301
}
1302+
removeDialogHandler?.();
12961303
await closeRemoteChromeTarget(host, port, remoteTargetId ?? undefined, logger);
12971304
// Don't kill remote Chrome - it's not ours to manage
12981305
const totalSeconds = (Date.now() - startedAt) / 1000;

src/browser/pageActions.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export {
44
ensureNotBlocked,
55
ensureLoggedIn,
66
ensurePromptReady,
7+
installJavaScriptDialogAutoDismissal,
78
} from './actions/navigation.js';
89
export { ensureModelSelection } from './actions/modelSelection.js';
910
export { submitPrompt, clearPromptComposer } from './actions/promptComposer.js';

0 commit comments

Comments
 (0)