Skip to content

Commit 855cddd

Browse files
committed
Highlight regular expressions with tree-sitter-regex grammar
This grammar is bundled in nixos by default and seems good enough for java regular expressions. It is also maintained under the tree-sitter github org so is "official". In order to property identify the #" and closing " characters we have to parse them with the clojure grammar (in case the regex grammar is not available) and again with the regex grammar as part of the actual pattern. This could be avoided if either the clojure grammar captured a node for the inner contents of the regex literal, or the treesit-range-settings supported some kind of offest argument like the neovim tree-sitter mechanisms do. Should address issue #11
1 parent d698528 commit 855cddd

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

clojure-ts-mode.el

+47-2
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,48 @@ Only intended for use at development time.")
283283
:feature 'string
284284
:language 'clojure
285285
'((str_lit) @font-lock-string-face
286-
(regex_lit) @font-lock-string-face)
286+
(regex_lit) @font-lock-regex-face)
287287

288288
:feature 'regex
289289
:language 'clojure
290290
:override t
291-
'((regex_lit marker: _ @font-lock-property-face))
291+
'((regex_lit marker: "#" @font-lock-regex-face))
292+
293+
;; https://github.com/tree-sitter/tree-sitter-regex/blob/master/grammar.js
294+
:feature 'regex
295+
:language 'regex
296+
:override t
297+
'(;; This captures the #"" characters that surround a regex in clojure.
298+
;; If we could define offsets in treesit-range-settings
299+
;; this would not be necessary
300+
((pattern (term
301+
:anchor (pattern_character) @font-lock-regex-face
302+
:anchor (pattern_character) @font-lock-string-face
303+
(pattern_character) @font-lock-string-face :anchor))
304+
(:equal @font-lock-regex-face "#")
305+
(:equal @font-lock-string-face "\""))
306+
;; Capturing Groups
307+
((anonymous_capturing_group (["(" ")"]) @font-lock-regexp-grouping-construct))
308+
((non_capturing_group (["(?:" ")"]) @font-lock-regexp-grouping-construct))
309+
((lookahead_assertion (["(?" "=" "!" ")"]) @font-lock-regexp-grouping-construct))
310+
((named_capturing_group (["(?<" ">" ")"]) @font-lock-regexp-grouping-construct))
311+
((group_name) @font-lock-variable-name-face)
312+
;; Character classes
313+
((character_class (["[" "]"]) @font-lock-bracket-face))
314+
((character_class "^" @font-lock-negation-char-face))
315+
((class_range "-" @font-lock-punctuation-face))
316+
;; Quantifiers
317+
([(zero_or_more) (one_or_more) (optional)]) @font-lock-keyword-face
318+
((count_quantifier (["{" "}"]) @font-lock-bracket-face))
319+
((count_quantifier "," @font-lock-punctuation-face))
320+
((count_quantifier (decimal_digits) @font-lock-number-face))
321+
;;; Escaping
322+
([(start_assertion) (any_character) (end_assertion)]) @font-lock-keyword-face
323+
([(decimal_escape)
324+
(identity_escape)
325+
(character_class_escape)]) @font-lock-regexp-grouping-backslash
326+
((pattern_character) @font-lock-regexp-face)
327+
([(control_escape) (boundary_assertion)] @font-lock-builtin-face))
292328

293329
:feature 'number
294330
:language 'clojure
@@ -597,6 +633,12 @@ See `clojure-ts--standard-definition-node-name' for the implementation used.")
597633
(interactive)
598634
(message "clojure-ts-mode (version %s)" clojure-ts-mode-version))
599635

636+
(defvar clojure-ts--treesit-range-settings
637+
(treesit-range-rules
638+
:embed 'regex
639+
:host 'clojure
640+
'((regex_lit) @capture)))
641+
600642
;;;###autoload
601643
(define-derived-mode clojure-ts-mode prog-mode "Clojure[TS]"
602644
"Major mode for editing Clojure code.
@@ -607,6 +649,9 @@ See `clojure-ts--standard-definition-node-name' for the implementation used.")
607649
(treesit-install-language-grammar 'clojure))
608650
(setq-local comment-start ";")
609651
(when (treesit-ready-p 'clojure)
652+
(when (treesit-ready-p 'regex 'message)
653+
(treesit-parser-create 'regex)
654+
(setq-local treesit-range-settings clojure-ts--treesit-range-settings))
610655
(treesit-parser-create 'clojure)
611656
(setq-local treesit-font-lock-settings (clojure-ts--font-lock-settings)
612657
treesit-defun-prefer-top-level t

test/test.clj

+1-1
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ Etiam commodo nulla id risus convallis pharetra. Integer dapibus, eros vitae veh
264264
(println "Hello, World!"))
265265

266266
(binding [*out* nil]
267-
#"regex string"
267+
#"^(?<lookup>abc)[0-9]\b$"
268268
(def #^Typehint x 1)
269269
(def #^:metadata x 1)
270270
(def ^Typehint x 2)

0 commit comments

Comments
 (0)