Skip to content

Commit 3d01665

Browse files
Merge pull request #119 from cucumber/bugfix/regex_regression
Bugfix/regex regression
2 parents be18223 + 4bbee18 commit 3d01665

3 files changed

Lines changed: 47 additions & 20 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
66
and this project adheres to [Semantic Versioning](http://semver.org/).
77

88
## [Unreleased]
9+
10+
### Fixed
11+
- Fixed a regression in the python language implementation for regexes [#119](https://github.com/cucumber/language-service/pull/119)
12+
913
### Added
1014
- Added support for JavaScript - [#42](https://github.com/cucumber/language-service/issues/42), [#115](https://github.com/cucumber/language-service/pull/115), [#120](https://github.com/cucumber/language-service/pull/120)
1115

src/language/pythonLanguage.ts

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export const pythonLanguage: Language = {
88
case 'string': {
99
return stringLiteral(node)
1010
}
11-
case 'concatednated_string': {
11+
case 'concatenated_string': {
1212
return stringLiteral(node)
1313
}
1414
case 'identifier': {
@@ -20,18 +20,15 @@ export const pythonLanguage: Language = {
2020
}
2121
},
2222
toParameterTypeRegExps(node: TreeSitterSyntaxNode) {
23-
return RegExp(cleanRegex(stringLiteral(node)))
23+
return RegExp(cleanRegExp(stringLiteral(node)))
2424
},
2525
toStepDefinitionExpression(node: TreeSitterSyntaxNode): StringOrRegExp {
26-
// this removes the head and tail apostrophes
27-
// remove python named capture groups.
26+
// This removes the head and tail apostrophes.
2827
// TODO: This should be temporary. Python supports
2928
// a wider array of regex features than javascript
3029
// a singular way of communicating regex consistent
3130
// across languages is necessary
32-
return isRegex(node.text.slice(1, -1))
33-
? RegExp(cleanRegex(node.text.slice(1, -1).split('?P').join('')))
34-
: node.text.slice(1, -1)
31+
return toStringOrRegExp(node.text)
3532
},
3633
defineParameterTypeQueries: [
3734
`(call
@@ -56,13 +53,13 @@ export const pythonLanguage: Language = {
5653
defineStepDefinitionQueries: [
5754
`
5855
(decorated_definition
59-
(decorator
60-
(call
61-
function: (identifier) @method
62-
arguments: (argument_list (string) @expression)
63-
)
56+
(decorator
57+
(call
58+
function: (identifier) @method
59+
arguments: (argument_list (string) @expression)
6460
)
65-
(#match? @method "(given|when|then)")
61+
)
62+
(#match? @method "(given|when|then)")
6663
) @root
6764
`,
6865
],
@@ -86,25 +83,32 @@ export const pythonLanguage: Language = {
8683
# Please convert to use regular expressions, as Behave does not currently support Cucumber Expressions`,
8784
}
8885

89-
function cleanRegex(regexString: string) {
90-
const startsWith = regexString[0]
86+
function cleanRegExp(regExpString: string): string {
87+
const startsWith = regExpString[0]
9188
switch (startsWith) {
9289
case '/':
93-
return regexString.slice(1, -1)
90+
return regExpString.slice(1, -1)
9491
default:
95-
return regexString
92+
return regExpString
9693
}
9794
}
95+
export function toStringOrRegExp(step: string): StringOrRegExp {
96+
return isRegExp(step.slice(1, -1))
97+
? RegExp(cleanRegExp(step.slice(1, -1).split('?P').join('')))
98+
: step.slice(1, -1)
99+
}
98100

99-
function stringLiteral(node: TreeSitterSyntaxNode) {
101+
function stringLiteral(node: TreeSitterSyntaxNode): string {
100102
const isFString = node.text.startsWith('f')
101103
const cleanWord = isFString ? node.text.slice(1).slice(1, -1) : node.text.slice(1, -1)
102104
return cleanWord
103105
}
104106

105-
function isRegex(cleanWord: string) {
107+
export function isRegExp(cleanWord: string): boolean {
106108
const startsWithSlash = cleanWord.startsWith('/')
107109
const namedGroupMatch = /\?P/
110+
const specialCharsMatch = /\(|\)|\.|\*|\\|\|/
108111
const containsNamedGroups = namedGroupMatch.test(cleanWord)
109-
return startsWithSlash || containsNamedGroups
112+
const containsSpecialChars = specialCharsMatch.test(cleanWord)
113+
return startsWithSlash || containsNamedGroups || containsSpecialChars
110114
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import assert from 'assert'
2+
3+
import { toStringOrRegExp } from '../../src/language/pythonLanguage.js'
4+
5+
describe('pythonLanguage', () => {
6+
it('should identify and return regexes correctly', () => {
7+
// NOTE these are strings that would look like from tree-sitter
8+
const regexes = ['"Something (.*)"', '"Catch them digits \\d+"']
9+
regexes.forEach(function (regex) {
10+
assert(toStringOrRegExp(regex) instanceof RegExp)
11+
})
12+
})
13+
it('should identify normal strings and just return a string', () => {
14+
const nonregexes = ['"test"']
15+
nonregexes.forEach(function (nonregex) {
16+
assert(toStringOrRegExp(nonregex) == 'test')
17+
})
18+
})
19+
})

0 commit comments

Comments
 (0)