|
| 1 | +;;; tree-sitter-langs-build.el --- Building grammar bundle -*- lexical-binding: t; coding: utf-8 -*- |
| 2 | + |
| 3 | +;; Copyright (C) 2020 Tuấn-Anh Nguyễn |
| 4 | +;; |
| 5 | +;; Author: Tuấn-Anh Nguyễn <[email protected]> |
| 6 | + |
| 7 | +;;; Commentary: |
| 8 | + |
| 9 | +;; This file contains functions to obtain and build `tree-sitter' grammars. |
| 10 | + |
| 11 | +;;; Code: |
| 12 | + |
| 13 | +(require 'seq) |
| 14 | +(require 'pp) |
| 15 | +(require 'dired-aux) |
| 16 | +(require 'url) |
| 17 | +(require 'tar-mode) |
| 18 | + |
| 19 | +(eval-when-compile |
| 20 | + (require 'subr-x) |
| 21 | + (require 'pcase) |
| 22 | + (require 'cl-lib)) |
| 23 | + |
| 24 | +(defconst tree-sitter-langs--suffixes '(".dylib" ".dll" ".so") |
| 25 | + "List of suffixes for shared libraries that define tree-sitter languages.") |
| 26 | + |
| 27 | +(defconst tree-sitter-langs--dir |
| 28 | + (file-name-directory (locate-library "tree-sitter-langs"))) |
| 29 | + |
| 30 | +(defconst tree-sitter-langs--bin-dir |
| 31 | + (file-name-as-directory |
| 32 | + (concat tree-sitter-langs--dir "bin"))) |
| 33 | + |
| 34 | +(defconst tree-sitter-langs--version |
| 35 | + (let ((main-file (locate-library "tree-sitter-langs.el"))) |
| 36 | + (unless main-file |
| 37 | + (error "Could not find tree-sitter-langs.el")) |
| 38 | + (with-temp-buffer |
| 39 | + (insert-file-contents-literally main-file) |
| 40 | + (unless (re-search-forward ";; Version: \\(.+\\)") |
| 41 | + (error "Could not determine tree-sitter-langs version")) |
| 42 | + (match-string 1)))) |
| 43 | + |
| 44 | +(defconst tree-sitter-langs--os |
| 45 | + (pcase system-type |
| 46 | + ('darwin "macos") |
| 47 | + ('gnu/linux "linux") |
| 48 | + ('windows-nt "windows") |
| 49 | + (_ (error "Unsupported system-type %s" system-type)))) |
| 50 | + |
| 51 | +(defun tree-sitter-langs--bundle-file (&optional ext version os) |
| 52 | + "Return the grammar bundle file's name, with optional EXT. |
| 53 | +If VERSION and OS are not spcified, use the defaults of |
| 54 | +`tree-sitter-langs--version' and `tree-sitter-langs--os'." |
| 55 | + (format "tree-sitter-grammars-%s-%s.tar%s" |
| 56 | + (or os tree-sitter-langs--os) |
| 57 | + (or version tree-sitter-langs--version) |
| 58 | + (or ext ""))) |
| 59 | + |
| 60 | +(defun tree-sitter-langs--bundle-url (&optional version os) |
| 61 | + "Return the URL to download the grammar bundle. |
| 62 | +If VERSION and OS are not spcified, use the defaults of |
| 63 | +`tree-sitter-langs--version' and `tree-sitter-langs--os'." |
| 64 | + (format "https://dl.bintray.com/ubolonton/emacs/%s" |
| 65 | + (tree-sitter-langs--bundle-file ".gz" version os))) |
| 66 | + |
| 67 | +;;; A list of (LANG-SYMBOL VERSION-TO-BUILD &optional PATHS REPO-URL). |
| 68 | +(defconst tree-sitter-langs-repos |
| 69 | + '((agda "v1.2.1") |
| 70 | + (bash "v0.16.0") |
| 71 | + (c "v0.16.0") |
| 72 | + (c-sharp "v0.16.1") |
| 73 | + (cpp "v0.16.0") |
| 74 | + (css "v0.16.0") |
| 75 | + (fluent "v0.12.0") |
| 76 | + (go "v0.16.0") |
| 77 | + (haskell "v0.13.0") |
| 78 | + (html "v0.16.0") |
| 79 | + (java "v0.16.0") |
| 80 | + (javascript "v0.16.0") |
| 81 | + (jsdoc "v0.16.0") |
| 82 | + (json "v0.16.0") |
| 83 | + (julia "v0.0.3") |
| 84 | + (ocaml "v0.15.0") |
| 85 | + (php "v0.16.1") |
| 86 | + (python "v0.16.0") |
| 87 | + (ruby "v0.16.1") |
| 88 | + (rust "v0.16.0") |
| 89 | + (scala "v0.13.0") |
| 90 | + (swift "a22fa5e") |
| 91 | + (typescript "v0.16.1" ("typescript" "tsx"))) |
| 92 | + "List of language symbols and their corresponding grammar sources.") |
| 93 | + |
| 94 | +(defconst tree-sitter-langs--repos-dir |
| 95 | + (file-name-as-directory |
| 96 | + (concat (file-name-directory (locate-library "tree-sitter-langs")) |
| 97 | + "repos")) |
| 98 | + "Directory to store grammar repos, for compilation.") |
| 99 | + |
| 100 | +(defun tree-sitter-langs--source (lang-symbol) |
| 101 | + "Return a plist describing the source of the grammar for LANG-SYMBOL." |
| 102 | + (when-let ((source (alist-get lang-symbol tree-sitter-langs-repos))) |
| 103 | + (let ((version (or (nth 0 source) "origin/master")) |
| 104 | + (paths (or (nth 1 source) (list ""))) |
| 105 | + (repo (or (nth 2 source) (format "https://github.com/tree-sitter/tree-sitter-%s" (symbol-name lang-symbol))))) |
| 106 | + (list :repo repo :version version :paths paths)))) |
| 107 | + |
| 108 | +(defvar tree-sitter-langs--out nil) |
| 109 | + |
| 110 | +;;; TODO: Use (maybe make) an async library, with a proper event loop, instead |
| 111 | +;;; of busy-waiting. |
| 112 | +(defun tree-sitter-langs--call (program &rest args) |
| 113 | + "Call PROGRAM with ARGS, using BUFFER as stdout+stderr. |
| 114 | +If BUFFER is nil, `princ' is used to forward its stdout+stderr." |
| 115 | + (let* ((command `(,program . ,args)) |
| 116 | + (_ (message "[tree-sitter-langs] Running %s in %s" command default-directory)) |
| 117 | + (base `(:name ,program :command ,command)) |
| 118 | + (output (if tree-sitter-langs--out |
| 119 | + `(:buffer ,tree-sitter-langs--out) |
| 120 | + `(:filter (lambda (proc string) |
| 121 | + (princ string))))) |
| 122 | + (proc (let ((process-environment (cons (format "TREE_SITTER_DIR=%s" tree-sitter-langs--dir) |
| 123 | + process-environment))) |
| 124 | + (apply #'make-process (append base output)))) |
| 125 | + (exit-code (progn |
| 126 | + (while (not (memq (process-status proc) |
| 127 | + '(exit failed signal))) |
| 128 | + (sleep-for 0.1)) |
| 129 | + (process-exit-status proc)))) |
| 130 | + (unless (= exit-code 0) |
| 131 | + (error "Error calling %s, exit code is %s" command exit-code)))) |
| 132 | + |
| 133 | +(defun tree-sitter-langs--get-latest-tags () |
| 134 | + "Return the `tree-sitter-langs-repos' with versions replaced by latest tags. |
| 135 | +If there's no tag, return \"origin/master\"." |
| 136 | + (require 'magit) |
| 137 | + (seq-map |
| 138 | + (lambda (desc) |
| 139 | + (pcase-let* |
| 140 | + ((`(,lang-symbol . _) desc) |
| 141 | + (lang-name (symbol-name lang-symbol)) |
| 142 | + (default-directory (concat tree-sitter-langs--repos-dir |
| 143 | + (format "tree-sitter-%s" lang-name)))) |
| 144 | + `(,lang-symbol ,(or (magit-get-current-tag) |
| 145 | + "origin/master")))) |
| 146 | + tree-sitter-langs-repos)) |
| 147 | + |
| 148 | +(defun tree-sitter-langs--buffer (name) |
| 149 | + "Return a buffer from NAME, as the DESTINATION of `call-process'. |
| 150 | +In batch mode, return stdout." |
| 151 | + (unless noninteractive |
| 152 | + (let ((buf (get-buffer-create name))) |
| 153 | + (pop-to-buffer buf) |
| 154 | + (delete-region (point-min) (point-max)) |
| 155 | + (redisplay) |
| 156 | + buf))) |
| 157 | + |
| 158 | +;;; TODO: Load to check binary compatibility. |
| 159 | +(defun tree-sitter-langs-compile (lang-symbol) |
| 160 | + "Download and compile the grammar for LANG-SYMBOL. |
| 161 | +Requires git and tree-sitter CLI." |
| 162 | + (message "[tree-sitter-langs] Processing %s" lang-symbol) |
| 163 | + (unless (executable-find "git") |
| 164 | + (error "Could not find git (needed to download grammars)")) |
| 165 | + (unless (executable-find "tree-sitter") |
| 166 | + (error "Could not find tree-sitter executable (needed to compile grammars)")) |
| 167 | + (let* ((source (tree-sitter-langs--source lang-symbol)) |
| 168 | + (lang-name (symbol-name lang-symbol)) |
| 169 | + (dir (if source |
| 170 | + (file-name-as-directory |
| 171 | + (concat tree-sitter-langs--repos-dir |
| 172 | + (format "tree-sitter-%s" lang-name))) |
| 173 | + (error "Unknown language `%s'" lang-name))) |
| 174 | + (repo (plist-get source :repo)) |
| 175 | + (paths (plist-get source :paths)) |
| 176 | + (version (plist-get source :version)) |
| 177 | + (tree-sitter-langs--out (tree-sitter-langs--buffer |
| 178 | + (format "*tree-sitter-langs-compile %s*" lang-name)))) |
| 179 | + (if (file-directory-p dir) |
| 180 | + (let ((default-directory dir)) |
| 181 | + (tree-sitter-langs--call "git" "remote" "-v" "update")) |
| 182 | + (tree-sitter-langs--call "git" "clone" "-v" repo dir)) |
| 183 | + (let ((default-directory dir)) |
| 184 | + (tree-sitter-langs--call "git" "reset" "--hard" version) |
| 185 | + ;; TODO: Figure out why we need to skip `npm install' for some repos. |
| 186 | + (ignore-errors |
| 187 | + (tree-sitter-langs--call "npm" "install")) |
| 188 | + ;; A repo can have multiple grammars (e.g. typescript + tsx). |
| 189 | + (dolist (path paths) |
| 190 | + (let ((default-directory (file-name-as-directory (concat dir path)))) |
| 191 | + (tree-sitter-langs--call "tree-sitter" "generate") |
| 192 | + (tree-sitter-langs--call "tree-sitter" "test"))) |
| 193 | + (tree-sitter-langs--call "git" "reset" "--hard" "HEAD") |
| 194 | + (tree-sitter-langs--call "git" "clean" "-f")))) |
| 195 | + |
| 196 | +(defun tree-sitter-langs-create-bundle () |
| 197 | + "Create a bundle of language grammars. |
| 198 | +The bundle includes all languages declared in `tree-sitter-langs-repos'." |
| 199 | + (unless (executable-find "tar") |
| 200 | + (error "Could not find tar executable (needed to bundle compiled grammars)")) |
| 201 | + (let ((errors (thread-last tree-sitter-langs-repos |
| 202 | + (seq-map |
| 203 | + (lambda (desc) |
| 204 | + (pcase-let ((`(,lang-symbol . _) desc)) |
| 205 | + (message "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") |
| 206 | + (condition-case err |
| 207 | + (tree-sitter-langs-compile lang-symbol) |
| 208 | + (error `[,lang-symbol ,err]))))) |
| 209 | + (seq-filter #'identity)))) |
| 210 | + (message "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") |
| 211 | + (unwind-protect |
| 212 | + (let* ((tar-file (concat (file-name-as-directory |
| 213 | + (expand-file-name default-directory)) |
| 214 | + (tree-sitter-langs--bundle-file))) |
| 215 | + (default-directory tree-sitter-langs--bin-dir) |
| 216 | + (tree-sitter-langs--out (tree-sitter-langs--buffer "*tree-sitter-langs-create-bundle*")) |
| 217 | + (files (seq-filter (lambda (file) |
| 218 | + (when (seq-some (lambda (ext) (string-suffix-p ext file)) |
| 219 | + tree-sitter-langs--suffixes) |
| 220 | + file)) |
| 221 | + (directory-files default-directory))) |
| 222 | + ;; Disk names in Windows can confuse tar, so we need this option. BSD |
| 223 | + ;; tar (macOS) doesn't have it, so we don't set it everywhere. |
| 224 | + ;; https://unix.stackexchange.com/questions/13377/tar/13381#13381. |
| 225 | + (tar-opts (pcase system-type |
| 226 | + ('windows-nt '("--force-local"))))) |
| 227 | + (apply #'tree-sitter-langs--call "tar" "-cvf" tar-file (append tar-opts files)) |
| 228 | + (let ((dired-compress-file-suffixes '(("\\.tar\\'" ".tar.gz" nil)))) |
| 229 | + (dired-compress-file tar-file))) |
| 230 | + (when errors |
| 231 | + (message "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") |
| 232 | + (display-warning 'tree-sitter |
| 233 | + (format "Could not compile grammars:\n%s" (pp-to-string errors))))))) |
| 234 | + |
| 235 | +;;;###autoload |
| 236 | +(defun tree-sitter-langs-install (&optional version os keep-bundle) |
| 237 | + "Download and install the specified VERSION of the language grammar bundle. |
| 238 | +If VERSION and OS are not specified, use the defaults of |
| 239 | +`tree-sitter-langs--version' and `tree-sitter-langs--os'. |
| 240 | +
|
| 241 | +The download bundle file is deleted after installation, unless KEEP-BUNDLE is |
| 242 | +non-nil." |
| 243 | + (interactive) |
| 244 | + (let ((dir tree-sitter-langs--bin-dir)) |
| 245 | + (unless (file-directory-p dir) |
| 246 | + (make-directory dir t)) |
| 247 | + (let ((default-directory dir) |
| 248 | + (bundle-file (tree-sitter-langs--bundle-file ".gz" version os))) |
| 249 | + ;; FIX: Handle HTTP errors properly. |
| 250 | + (url-copy-file (tree-sitter-langs--bundle-url version os) |
| 251 | + bundle-file 'ok-if-already-exists) |
| 252 | + (with-temp-buffer |
| 253 | + (insert-file-contents bundle-file) |
| 254 | + (tar-mode) |
| 255 | + (tar-untar-buffer)) |
| 256 | + (unless keep-bundle |
| 257 | + (delete-file bundle-file 'trash)) |
| 258 | + (when (y-or-n-p (format "Show installed grammars in %s? " dir)) |
| 259 | + (with-current-buffer (find-file dir) |
| 260 | + (when (bound-and-true-p dired-omit-mode) |
| 261 | + (dired-omit-mode -1))))))) |
| 262 | + |
| 263 | +;;; This doesn't actually belong here, but for convenience we don't want to put |
| 264 | +;;; this in another `tree-sitter-bootstrap' module. |
| 265 | +(defun tree-sitter-download-dyn-module () |
| 266 | + "Download the pre-compiled `tree-sitter-dyn' module." |
| 267 | + (let* ((main-file (locate-library "tree-sitter.el")) |
| 268 | + (_ (unless main-file |
| 269 | + (error "Could not find tree-sitter.el"))) |
| 270 | + (version (with-temp-buffer |
| 271 | + (insert-file-contents-literally main-file) |
| 272 | + (unless (re-search-forward ";; Version: \\(.+\\)") |
| 273 | + (error "Could not determine tree-sitter version")) |
| 274 | + (match-string 1))) |
| 275 | + (ext (pcase system-type |
| 276 | + ('windows-nt "dll") |
| 277 | + ('darwin "dylib") |
| 278 | + ('gnu/linux "so") |
| 279 | + (_ (error "Unsupported system-type %s" system-type)))) |
| 280 | + (dyn-file (format "tree-sitter-dyn.%s" ext)) |
| 281 | + (gz-file (format "%s.gz" dyn-file)) |
| 282 | + (url (format "https://github.com/ubolonton/emacs-tree-sitter/releases/download/%s/%s" |
| 283 | + version gz-file)) |
| 284 | + (default-directory (file-name-directory main-file))) |
| 285 | + (if (file-exists-p dyn-file) |
| 286 | + (when (y-or-n-p (format "Overwrite %s? " dyn-file)) |
| 287 | + (url-copy-file url gz-file) |
| 288 | + (delete-file dyn-file) |
| 289 | + (dired-compress-file gz-file)) |
| 290 | + (url-copy-file url gz-file) |
| 291 | + (dired-compress-file gz-file)))) |
| 292 | + |
| 293 | +(provide 'tree-sitter-langs-build) |
| 294 | +;;; tree-sitter-langs-build.el ends here |
0 commit comments