Skip to content
This repository was archived by the owner on Apr 18, 2024. It is now read-only.

Commit 5bf915e

Browse files
bmartelhlomzik
andauthored
fix: LSDV-4747: Selecting the end character on a Paragraph phrase to the very start of other phrases only includes the first phrase selection (#1257)
* adding another test case to paragraphs for selection issues * Look for correct text node when selection ends on div But range stored in region should also be fixed and not have 0 as the endOffset * changing the test to select a more easily reproducible case manually * fix highlighting of spans * adding test for loading paragraph ranges correctly, still need to get this passing but now closer to a solution * making test for initializing a paragraph region pass * for some reason using Dialogue as the default dev config causes timeout errors * fix smoke test for audio paragraphs * making the result text function simplified to use existing phraseElements * removing unused function * adding another test case, and making a slight adjustment to fix * updating codept timeout as more tests are pushing the threshold --------- Co-authored-by: hlomzik <[email protected]>
1 parent b00b0a3 commit 5bf915e

File tree

5 files changed

+203
-14
lines changed

5 files changed

+203
-14
lines changed

e2e/codecept.conf.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ const fs = require('fs');
77
const FRAGMENTS_PATH = './fragments/';
88

99
module.exports.config = {
10-
timeout: 60 * 30, // Time out after 30 minutes
10+
timeout: 60 * 40, // Time out after 40 minutes
1111
tests: './tests/**/*.test.js',
1212
output: './output',
1313
helpers: {

e2e/tests/paragraphs-filter.test.js

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,28 @@ Feature('Paragraphs filter');
55

66
const AUDIO = 'https://htx-misc.s3.amazonaws.com/opensource/label-studio/examples/audio/barradeen-emotional.mp3';
77

8+
const ANNOTATIONS = [
9+
{
10+
'result': [
11+
{
12+
'id':'ryzr4QdL93',
13+
'from_name':'ner',
14+
'to_name':'text',
15+
'source':'$dialogue',
16+
'type':'paragraphlabels',
17+
'value':{
18+
'start':'2',
19+
'end':'4',
20+
'startOffset':0,
21+
'endOffset':134,
22+
'paragraphlabels': ['Important Stuff'],
23+
'text': 'Uncomfortable silences. Why do we feel its necessary to yak about bullshit in order to be comfortable?I dont know. Thats a good question.Thats when you know you found somebody really special. When you can just shut the fuck up for a minute, and comfortably share silence.',
24+
},
25+
},
26+
],
27+
},
28+
];
29+
830
const DATA = {
931
audio: AUDIO,
1032
dialogue: [
@@ -36,6 +58,7 @@ const DATA = {
3658
},
3759
],
3860
};
61+
3962
const CONFIG = `
4063
<View>
4164
<ParagraphLabels name="ner" toName="text">
@@ -461,3 +484,117 @@ Scenario(
461484
}
462485
},
463486
);
487+
488+
Scenario('Selecting the end character on a paragraph phrase to the very start of other phrases includes all selected phrases', async ({ I, LabelStudio, AtSidebar, AtParagraphs, AtLabels }) => {
489+
const params = {
490+
data: DATA,
491+
config: CONFIG,
492+
};
493+
494+
I.amOnPage('/');
495+
496+
LabelStudio.setFeatureFlags(FEATURE_FLAGS);
497+
LabelStudio.init(params);
498+
AtSidebar.seeRegions(0);
499+
500+
I.say('Select 2 regions in the consecutive phrases');
501+
502+
AtLabels.clickLabel('Random talk');
503+
AtParagraphs.setSelection(
504+
AtParagraphs.locateText('Dont you hate that?'),
505+
18,
506+
AtParagraphs.locateText('Uncomfortable silences. Why do we feel its necessary to yak about bullshit in order to be comfortable?'),
507+
0,
508+
);
509+
510+
AtSidebar.seeRegions(1);
511+
512+
const result = await LabelStudio.serialize();
513+
514+
assert.deepStrictEqual(
515+
omitBy(result[0].value, (v, key) => key === 'paragraphlabels'),
516+
{
517+
start: '0',
518+
end: '1',
519+
startOffset: 18,
520+
endOffset: 10,
521+
text: '?\n\nHate what?',
522+
},
523+
);
524+
});
525+
526+
Scenario('Selecting the end character on a paragraph phrase to the very start of other phrases includes all selected phrases except the very last one', async ({ I, LabelStudio, AtSidebar, AtParagraphs, AtLabels }) => {
527+
const params = {
528+
data: {
529+
...DATA,
530+
dialogue: DATA.dialogue.map(d => [d, { ...d, text: `${d.text}2` }]).flat(),
531+
},
532+
config: CONFIG,
533+
};
534+
535+
I.amOnPage('/');
536+
537+
LabelStudio.setFeatureFlags(FEATURE_FLAGS);
538+
LabelStudio.init(params);
539+
AtSidebar.seeRegions(0);
540+
541+
542+
I.say('Select 2 regions in the consecutive phrases of the one person');
543+
AtParagraphs.clickFilter('Vincent Vega');
544+
AtLabels.clickLabel('Random talk');
545+
AtParagraphs.setSelection(
546+
AtParagraphs.locateText('Hate what?2'),
547+
10,
548+
AtParagraphs.locateText('I dont know. Thats a good question.2'),
549+
0,
550+
);
551+
552+
AtSidebar.seeRegions(2);
553+
554+
const result = await LabelStudio.serialize();
555+
556+
assert.deepStrictEqual(
557+
omitBy(result[0].value, (v, key) => key === 'paragraphlabels'),
558+
{
559+
start: '3',
560+
end: '3',
561+
startOffset: 10,
562+
endOffset: 11,
563+
text: '2',
564+
},
565+
);
566+
assert.deepStrictEqual(
567+
omitBy(result[1].value, (v, key) => key === 'paragraphlabels'),
568+
{
569+
start: '6',
570+
end: '6',
571+
startOffset: 0,
572+
endOffset: 35,
573+
text: 'I dont know. Thats a good question.',
574+
},
575+
);
576+
});
577+
578+
Scenario('Initializing a paragraph region range should not include author names in text', async ({ I, LabelStudio, AtSidebar }) => {
579+
const params = {
580+
data: DATA,
581+
annotations: ANNOTATIONS,
582+
config: CONFIG,
583+
};
584+
585+
I.amOnPage('/');
586+
LabelStudio.setFeatureFlags(FEATURE_FLAGS);
587+
588+
const [{ result : [region] }] = ANNOTATIONS;
589+
const { paragraphlabels: _paragraphlabels, ...value } = region.value;
590+
591+
LabelStudio.init(params);
592+
AtSidebar.seeRegions(1);
593+
594+
const result = await LabelStudio.serialize();
595+
596+
assert.deepStrictEqual(
597+
omitBy(result[0].value, (v, key) => key === 'paragraphlabels'),
598+
value,
599+
);
600+
});

src/tags/object/Paragraphs/Paragraphs.js

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,33 @@ class HtxParagraphsView extends Component {
9393
for (i = 0; i < selection.rangeCount; i++) {
9494
const r = selection.getRangeAt(i);
9595

96-
if (r.endContainer.nodeName === 'DIV') {
97-
r.setEnd(r.startContainer, r.startContainer.length);
96+
if (r.endContainer.nodeType !== Node.TEXT_NODE) {
97+
// offsets work differently for nodes and texts, so we have to find #text.
98+
// lastChild because most probably this is div of the whole paragraph,
99+
// and it has author div and phrase div.
100+
const el = this.getPhraseElement(r.endContainer.lastChild);
101+
let textNode = el;
102+
103+
while (textNode && textNode.nodeType !== Node.TEXT_NODE) {
104+
textNode = textNode.firstChild;
105+
}
106+
107+
// most probably this div is out of Paragraphs
108+
// @todo maybe select till the end of Paragraphs?
109+
if (!textNode) continue;
110+
111+
r.setEnd(textNode, 0);
98112
}
113+
99114
if (r.collapsed || /^\s*$/.test(r.toString())) continue;
100115

101116
try {
102117
splitBoundaries(r);
103118
const [startOffset, , start, originalStart] = this.getOffsetInPhraseElement(r.startContainer, r.startOffset);
104-
const [endOffset, , end, originalEnd] = this.getOffsetInPhraseElement(r.endContainer, r.endOffset, false);
119+
const [endOffset, , end, _originalEnd] = this.getOffsetInPhraseElement(r.endContainer, r.endOffset, false);
120+
121+
// if this shifts backwards, we need to take the lesser index.
122+
const originalEnd = Math.min(end, _originalEnd);
105123

106124
if (isFF(FF_DEV_2918)) {
107125
const visibleIndexes = item._value.reduce((visibleIndexes, v, idx) => {
@@ -286,14 +304,35 @@ class HtxParagraphsView extends Component {
286304
}
287305
}
288306

307+
/**
308+
* Generates a textual representation of the current selection range.
309+
*
310+
* @param {number} start
311+
* @param {number} end
312+
* @param {number} startOffset
313+
* @param {number} endOffset
314+
* @returns {string}
315+
*/
316+
_getResultText(start, end, startOffset, endOffset) {
317+
const phrases = this.phraseElements;
318+
319+
if (start === end) return phrases[start].innerText.slice(startOffset, endOffset);
320+
321+
return [
322+
phrases[start].innerText.slice(startOffset),
323+
phrases.slice(start + 1, end).map(phrase => phrase.innerText),
324+
phrases[end].innerText.slice(0, endOffset),
325+
].flat().join('');
326+
}
327+
289328
_handleUpdate() {
290329
const root = this.myRef.current;
291330
const { item } = this.props;
292331

293332
// wait until text is loaded
294333
if (!item._value) return;
295334

296-
item.regs.forEach(function(r, i) {
335+
item.regs.forEach((r, i) => {
297336
// spans can be totally missed if this is app init or undo/redo
298337
// or they can be disconnected from DOM on annotations switching
299338
// so we have to recreate them from regions data
@@ -304,6 +343,7 @@ class HtxParagraphsView extends Component {
304343
const range = document.createRange();
305344
const startNode = phrases[r.start].getElementsByClassName(item.layoutClasses.text)[0];
306345
const endNode = phrases[r.end].getElementsByClassName(item.layoutClasses.text)[0];
346+
307347
let { startOffset, endOffset } = r;
308348

309349
range.setStart(...findNodeAt(startNode, startOffset));
@@ -333,7 +373,7 @@ class HtxParagraphsView extends Component {
333373
r.fixOffsets(startOffset, endOffset);
334374
}
335375
} else if (!r.text && range.toString()) {
336-
r.setText(range.toString());
376+
r.setText(this._getResultText(+r.start, +r.end, startOffset, endOffset));
337377
}
338378

339379
splitBoundaries(range);

src/tags/object/Paragraphs/Phrases.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ export const Phrases = observer(({ item }) => {
2020
if (getRoot(item).settings.showLineNumbers) classNames.push(styles.numbered);
2121

2222
return (
23-
<div key={`${item.name}-${idx}`} className={classNames.join(' ')} style={style.phrase}>
23+
<div key={`${item.name}-${idx}`} data-testid={`phrase:${idx}`} className={classNames.join(' ')} style={style.phrase}>
2424
{isContentVisible && withAudio && !isNaN(v.start) && (
2525
<Button
2626
type="text"
@@ -29,11 +29,11 @@ export const Phrases = observer(({ item }) => {
2929
onClick={() => item.play(idx)}
3030
/>
3131
)}
32-
<span className={cls.name}>{v[item.namekey]}</span>
32+
<span className={cls.name} data-skip-node="true">{v[item.namekey]}</span>
3333
<span className={cls.text}>{v[item.textkey]}</span>
3434
</div>
3535
);
3636
});
3737

3838
return val;
39-
});
39+
});

src/utils/html.js

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,16 @@ function getNextNode(node) {
9292
}
9393
}
9494

95-
function getNodesInRange(range) {
95+
export function isValidTreeNode(node, commonAncestor) {
96+
while(node) {
97+
if (commonAncestor && node === commonAncestor) return true;
98+
if (node.nodeType === Node.ELEMENT_NODE && node.dataset.skipNode === 'true') return false;
99+
node = node.parentNode;
100+
}
101+
return true;
102+
}
103+
104+
export function getNodesInRange(range) {
96105
const start = range.startContainer;
97106
const end = range.endContainer;
98107
const commonAncestor = range.commonAncestorContainer;
@@ -101,20 +110,24 @@ function getNodesInRange(range) {
101110

102111
// walk parent nodes from start to common ancestor
103112
for (node = start.parentNode; node; node = node.parentNode) {
104-
nodes.push(node);
113+
if (isValidTreeNode(node, commonAncestor)) nodes.push(node);
105114
if (node === commonAncestor) break;
106115
}
107116
nodes.reverse();
108117

109118
// walk children and siblings from start until end is found
110119
for (node = start; node; node = getNextNode(node)) {
111-
nodes.push(node);
120+
if (isValidTreeNode(node, commonAncestor)) nodes.push(node);
112121
if (node === end) break;
113122
}
114123

115124
return nodes;
116125
}
117126

127+
export function getTextNodesInRange(range) {
128+
return getNodesInRange(range).filter(n => isTextNode(n));
129+
}
130+
118131
function documentReverse(node) {
119132
if (node.lastChild) return node.lastChild;
120133

@@ -192,8 +205,7 @@ function highlightRange(normedRange, cssClass, cssStyle) {
192205
cssClass = 'htx-annotation';
193206
}
194207

195-
const allNodes = getNodesInRange(normedRange._range);
196-
const textNodes = allNodes.filter(n => isTextNode(n));
208+
const textNodes = getTextNodesInRange(normedRange._range);
197209

198210
const white = /^\s*$/;
199211

0 commit comments

Comments
 (0)