diff --git a/cmd/syntactic-code-intel-worker/BUILD.bazel b/cmd/syntactic-code-intel-worker/BUILD.bazel new file mode 100644 index 000000000000..cc50d2886795 --- /dev/null +++ b/cmd/syntactic-code-intel-worker/BUILD.bazel @@ -0,0 +1,77 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") +load("@rules_oci//oci:defs.bzl", "oci_image", "oci_push", "oci_tarball") +load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("@container_structure_test//:defs.bzl", "container_structure_test") +load("//dev:oci_defs.bzl", "image_repository") + +go_library( + name = "syntactic-code-intel-worker_lib", + srcs = ["main.go"], + importpath = "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker", + visibility = ["//visibility:private"], + deps = [ + "//cmd/syntactic-code-intel-worker/shared", + "//internal/sanitycheck", + "//internal/service/svcmain", + ], +) + +go_binary( + name = "syntactic-code-intel-worker", + embed = [":syntactic-code-intel-worker_lib"], + visibility = ["//visibility:public"], + x_defs = { + "github.com/sourcegraph/sourcegraph/internal/version.version": "{STABLE_VERSION}", + "github.com/sourcegraph/sourcegraph/internal/version.timestamp": "{VERSION_TIMESTAMP}", + }, +) + +pkg_tar( + name = "tar_syntactic-code-intel-worker", + srcs = [":syntactic-code-intel-worker"], +) + +pkg_tar( + name = "tar_scip-treesitter", + srcs = ["//docker-images/syntax-highlighter/crates/scip-treesitter-cli:scip-treesitter"], + package_dir = "/usr/local/bin", +) + +oci_image( + name = "image", + base = "@wolfi_base", + entrypoint = [ + "/sbin/tini", + "--", + "/syntactic-code-intel-worker", + ], + tars = [ + ":tar_syntactic-code-intel-worker", + "tar_scip-treesitter", + ], + user = "sourcegraph", +) + +oci_tarball( + name = "image_tarball", + image = ":image", + repo_tags = ["syntactic-code-intel-worker:candidate"], +) + +container_structure_test( + name = "image_test", + timeout = "short", + configs = ["image_test.yaml"], + driver = "docker", + image = ":image", + tags = [ + "exclusive", + "requires-network", + ], +) + +oci_push( + name = "candidate_push", + image = ":image", + repository = image_repository("syntactic-code-intel-worker"), +) diff --git a/cmd/syntactic-code-intel-worker/CODENOTIFY b/cmd/syntactic-code-intel-worker/CODENOTIFY new file mode 100644 index 000000000000..ff7b5687a368 --- /dev/null +++ b/cmd/syntactic-code-intel-worker/CODENOTIFY @@ -0,0 +1 @@ +# See https://github.com/sourcegraph/codenotify for documentation. diff --git a/cmd/syntactic-code-intel-worker/README.md b/cmd/syntactic-code-intel-worker/README.md new file mode 100644 index 000000000000..6502b93e12ef --- /dev/null +++ b/cmd/syntactic-code-intel-worker/README.md @@ -0,0 +1,8 @@ +# Syntactic code intel worker + +🚧 WORK IN PROGRESS 🚧 + +Stateless service that handles generating SCIP data for codebases +using Tree-sitter for powering syntax-based code navigation. + +[Design docs](https://docs.google.com/document/d/14MHauv52o4zTFiV6gC6NOJZxcJpglK-ElWa64gqeKDo/edit) (Sourcegraph internal) diff --git a/cmd/syntactic-code-intel-worker/image_test.yaml b/cmd/syntactic-code-intel-worker/image_test.yaml new file mode 100644 index 000000000000..16dcf35d1df4 --- /dev/null +++ b/cmd/syntactic-code-intel-worker/image_test.yaml @@ -0,0 +1,21 @@ +schemaVersion: "2.0.0" + +commandTests: + - name: "worker binary is runnable" + command: "/syntactic-code-intel-worker" + envVars: + - key: "SANITY_CHECK" + value: "true" + + - name: "scip treesitter binary is runnable" + command: "/usr/local/bin/scip-treesitter" + envVars: + - key: "SANITY_CHECK" + value: "true" + + - name: "not running as root" + command: "/usr/bin/id" + args: + - -u + excludedOutput: ["^0"] + exitCode: 0 diff --git a/cmd/syntactic-code-intel-worker/main.go b/cmd/syntactic-code-intel-worker/main.go new file mode 100644 index 000000000000..38ac3340af37 --- /dev/null +++ b/cmd/syntactic-code-intel-worker/main.go @@ -0,0 +1,12 @@ +package main + +import ( + "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker/shared" + "github.com/sourcegraph/sourcegraph/internal/sanitycheck" + "github.com/sourcegraph/sourcegraph/internal/service/svcmain" +) + +func main() { + sanitycheck.Pass() + svcmain.SingleServiceMain(shared.Service) +} diff --git a/cmd/syntactic-code-intel-worker/shared/BUILD.bazel b/cmd/syntactic-code-intel-worker/shared/BUILD.bazel new file mode 100644 index 000000000000..24e2f2d4aa43 --- /dev/null +++ b/cmd/syntactic-code-intel-worker/shared/BUILD.bazel @@ -0,0 +1,24 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "shared", + srcs = [ + "config.go", + "service.go", + "shared.go", + ], + importpath = "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker/shared", + visibility = ["//visibility:public"], + deps = [ + "//internal/codeintel/shared/lsifuploadstore", + "//internal/debugserver", + "//internal/encryption/keyring", + "//internal/env", + "//internal/goroutine", + "//internal/httpserver", + "//internal/observation", + "//internal/service", + "//lib/errors", + "@com_github_sourcegraph_log//:log", + ], +) diff --git a/cmd/syntactic-code-intel-worker/shared/config.go b/cmd/syntactic-code-intel-worker/shared/config.go new file mode 100644 index 000000000000..5e87f279b851 --- /dev/null +++ b/cmd/syntactic-code-intel-worker/shared/config.go @@ -0,0 +1,55 @@ +package shared + +import ( + "net" + "strconv" + "time" + + "github.com/sourcegraph/sourcegraph/internal/codeintel/shared/lsifuploadstore" + "github.com/sourcegraph/sourcegraph/internal/env" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +type Config struct { + env.BaseConfig + + WorkerPollInterval time.Duration + WorkerConcurrency int + WorkerBudget int64 + MaximumRuntimePerJob time.Duration + SCIPUploadStoreConfig *lsifuploadstore.Config + CliPath string + ListenAddress string +} + +const DefaultPort = 3188 + +func (c *Config) Load() { + c.SCIPUploadStoreConfig = &lsifuploadstore.Config{} + c.SCIPUploadStoreConfig.Load() + + c.WorkerPollInterval = c.GetInterval("SYNTACTIC_CODE_INTEL_WORKER_POLL_INTERVAL", "1s", "Interval between queries to the repository queue") + c.WorkerConcurrency = c.GetInt("SYNTACTIC_CODE_INTEL_WORKER_CONCURRENCY", "1", "The maximum number of repositories that can be processed concurrently.") + c.WorkerBudget = int64(c.GetInt("SYNTACTIC_CODE_INTEL_WORKER_BUDGET", "0", "The amount of compressed input data (in bytes) a worker can process concurrently. Zero acts as an infinite budget.")) + c.MaximumRuntimePerJob = c.GetInterval("SYNTACTIC_CODE_INTEL_WORKER_MAXIMUM_RUNTIME_PER_JOB", "25m", "The maximum time a single repository indexing job can take") + + c.CliPath = c.Get("SCIP_TREESITTER_COMMAND", "scip-treesitter", "TODO: fill in description") + + c.ListenAddress = c.GetOptional("SYNTACTIC_CODE_INTEL_WORKER_ADDR", "The address under which the syntactic codeintel worker API listens. Can include a port.") + // Fall back to a reasonable default. + if c.ListenAddress == "" { + port := strconv.Itoa(DefaultPort) + host := "" + if env.InsecureDev { + host = "127.0.0.1" + } + c.ListenAddress = net.JoinHostPort(host, port) + } +} + +func (c *Config) Validate() error { + var errs error + errs = errors.Append(errs, c.BaseConfig.Validate()) + errs = errors.Append(errs, c.SCIPUploadStoreConfig.Validate()) + return errs +} diff --git a/cmd/syntactic-code-intel-worker/shared/service.go b/cmd/syntactic-code-intel-worker/shared/service.go new file mode 100644 index 000000000000..7a0b20a8e7f0 --- /dev/null +++ b/cmd/syntactic-code-intel-worker/shared/service.go @@ -0,0 +1,26 @@ +package shared + +import ( + "context" + + "github.com/sourcegraph/sourcegraph/internal/debugserver" + "github.com/sourcegraph/sourcegraph/internal/env" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/service" +) + +type svc struct{} + +func (svc) Name() string { return "syntactic-code-intel-worker" } + +func (svc) Configure() (env.Config, []debugserver.Endpoint) { + var config Config + config.Load() + return &config, nil +} + +func (svc) Start(ctx context.Context, observationCtx *observation.Context, ready service.ReadyFunc, config env.Config) error { + return Main(ctx, observationCtx, ready, *config.(*Config)) +} + +var Service service.Service = svc{} diff --git a/cmd/syntactic-code-intel-worker/shared/shared.go b/cmd/syntactic-code-intel-worker/shared/shared.go new file mode 100644 index 000000000000..c84e2b247c43 --- /dev/null +++ b/cmd/syntactic-code-intel-worker/shared/shared.go @@ -0,0 +1,40 @@ +package shared + +import ( + "context" + + "net/http" + "time" + + "github.com/sourcegraph/log" + "github.com/sourcegraph/sourcegraph/internal/encryption/keyring" + "github.com/sourcegraph/sourcegraph/internal/goroutine" + "github.com/sourcegraph/sourcegraph/internal/httpserver" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/service" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +func Main(ctx context.Context, observationCtx *observation.Context, ready service.ReadyFunc, config Config) error { + logger := observationCtx.Logger + + if err := keyring.Init(ctx); err != nil { + return errors.Wrap(err, "initializing keyring") + } + + logger.Info("Syntactic code intel worker running", + log.String("path to scip-treesitter CLI", config.CliPath), + log.String("API address", config.ListenAddress)) + + // Initialize health server + server := httpserver.NewFromAddr(config.ListenAddress, &http.Server{ + ReadTimeout: 75 * time.Second, + WriteTimeout: 10 * time.Minute, + Handler: httpserver.NewHandler(nil), + }) + + // Go! + goroutine.MonitorBackgroundRoutines(ctx, server) + + return nil +} diff --git a/dev/check/go-dbconn-import.sh b/dev/check/go-dbconn-import.sh index 32f9486af1bc..ae5fcb3273ac 100755 --- a/dev/check/go-dbconn-import.sh +++ b/dev/check/go-dbconn-import.sh @@ -18,6 +18,7 @@ allowed_prefix=( # Transitively depends on updatecheck package which imports but does not use DB github.com/sourcegraph/sourcegraph/cmd/pings github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker + github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker github.com/sourcegraph/sourcegraph/cmd/repo-updater # Transitively depends on zoekt package which imports but does not use DB github.com/sourcegraph/sourcegraph/cmd/searcher diff --git a/dev/linters/dbconn/dbconn.go b/dev/linters/dbconn/dbconn.go index f12557ab8ef6..8ffe7345480c 100644 --- a/dev/linters/dbconn/dbconn.go +++ b/dev/linters/dbconn/dbconn.go @@ -31,6 +31,7 @@ var allowedToImport = []string{ // Transitively depends on updatecheck package which imports but does not use DB "github.com/sourcegraph/sourcegraph/cmd/pings", "github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker", + "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker", "github.com/sourcegraph/sourcegraph/cmd/repo-updater", // Transitively depends on zoekt package which imports but does not use DB "github.com/sourcegraph/sourcegraph/cmd/searcher", diff --git a/dev/scip-treesitter-dev b/dev/scip-treesitter-dev new file mode 100755 index 000000000000..58e2aca1ceee --- /dev/null +++ b/dev/scip-treesitter-dev @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Wrapper for `scip-treesitter` similar to `dev/scip-ctags-dev`. +# +# To use an alternate scip-treesitter binary for development, invoke +# `SCIP_TREESITTER_COMMAND=path/to/scip-treesitter sg start`. + +root="$(dirname "${BASH_SOURCE[0]}")/.." >/dev/null +TARGET=$("$root/dev/scip-treesitter-install.sh" which) + +if [ ! -f "${TARGET}" ]; then + echo "scip-treesitter is not installed, please run ./dev/scip-treesitter-install.sh" + echo "Alternatively you can use SCIP_TREESITTER_COMMAND=path/to/scip-treesitter to use your own binary." + exit 1 +else + ${TARGET} "$@" +fi diff --git a/dev/scip-treesitter-install.sh b/dev/scip-treesitter-install.sh new file mode 100755 index 000000000000..83733ea8ba54 --- /dev/null +++ b/dev/scip-treesitter-install.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +set -euf -o pipefail +pushd "$(dirname "${BASH_SOURCE[0]}")/.." >/dev/null +mkdir -p .bin + +# TODO: add similar task to zoekt alpine + +NAME="scip-treesitter" +TARGET="$PWD/.bin/${NAME}" + +if [ $# -ne 0 ]; then + if [ "$1" == "which" ]; then + echo "$TARGET" + exit 0 + fi +fi + +function ctrl_c() { + printf "[-] Installation cancelled.\n" + exit 1 +} + +trap ctrl_c INT + +function build_scip_treesitter { + cd docker-images/syntax-highlighter/crates/scip-treesitter-cli + cargo build --bin scip-treesitter --target-dir target + cp ./target/release/scip-treesitter "$TARGET" +} + +build_scip_treesitter + +popd >/dev/null diff --git a/docker-images/syntax-highlighter/Cargo.Bazel.lock b/docker-images/syntax-highlighter/Cargo.Bazel.lock index 9b327f2412db..a8857912bd62 100644 --- a/docker-images/syntax-highlighter/Cargo.Bazel.lock +++ b/docker-images/syntax-highlighter/Cargo.Bazel.lock @@ -1,5 +1,5 @@ { - "checksum": "d9e91f35b8090c7c539c13c008d84bfffcc8e9ba4989fa33e0fa11e47136d117", + "checksum": "2aace7295e52e3affbef65c6d2975a2348c691c9ee9758235daa1104fcda8c37", "crates": { "addr2line 0.20.0": { "name": "addr2line", diff --git a/docker-images/syntax-highlighter/crates/scip-treesitter-cli/BUILD.bazel b/docker-images/syntax-highlighter/crates/scip-treesitter-cli/BUILD.bazel index 2ed763229b2d..c186f04e0c30 100644 --- a/docker-images/syntax-highlighter/crates/scip-treesitter-cli/BUILD.bazel +++ b/docker-images/syntax-highlighter/crates/scip-treesitter-cli/BUILD.bazel @@ -2,8 +2,8 @@ load("@crate_index//:defs.bzl", "aliases", "all_crate_deps") load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_library", "rust_test") rust_binary( - name = "scip-treesitter-cli", - srcs = glob(["src/main.rs"]), + name = "scip-treesitter", + srcs = ["src/bin/scip-treesitter.rs"], aliases = aliases(), proc_macro_deps = all_crate_deps( proc_macro = True, @@ -71,7 +71,7 @@ rust_test( ], allow_empty = False, ), - data = [":scip-treesitter-cli"] + + data = [":scip-treesitter"] + glob( ["tests/snapshots/**"], allow_empty = False, @@ -79,12 +79,12 @@ rust_test( env = { "INSTA_WORKSPACE_ROOT": ".", "RUST_BACKTRACE": "1", - "SCIP_CLI_LOCATION": "$(rootpath :scip-treesitter-cli)", + "SCIP_CLI_LOCATION": "$(rootpath :scip-treesitter)", }, deps = all_crate_deps( normal = True, ) + [ - ":scip-treesitter-cli", + ":scip-treesitter", ":scip-treesitter-cli-lib", ] + WORKSPACE_DEPS, ) diff --git a/docker-images/syntax-highlighter/crates/scip-treesitter-cli/Cargo.toml b/docker-images/syntax-highlighter/crates/scip-treesitter-cli/Cargo.toml index 9bf2848bea94..cf453983b8b4 100644 --- a/docker-images/syntax-highlighter/crates/scip-treesitter-cli/Cargo.toml +++ b/docker-images/syntax-highlighter/crates/scip-treesitter-cli/Cargo.toml @@ -3,7 +3,8 @@ name = "scip-treesitter-cli" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[bin]] +name = "scip-treesitter" [dependencies] lazy_static = "1.0" diff --git a/docker-images/syntax-highlighter/crates/scip-treesitter-cli/src/main.rs b/docker-images/syntax-highlighter/crates/scip-treesitter-cli/src/bin/scip-treesitter.rs similarity index 89% rename from docker-images/syntax-highlighter/crates/scip-treesitter-cli/src/main.rs rename to docker-images/syntax-highlighter/crates/scip-treesitter-cli/src/bin/scip-treesitter.rs index 493c7c80099a..24c3a66c5339 100644 --- a/docker-images/syntax-highlighter/crates/scip-treesitter-cli/src/main.rs +++ b/docker-images/syntax-highlighter/crates/scip-treesitter-cli/src/bin/scip-treesitter.rs @@ -81,6 +81,19 @@ enum Commands { } pub fn main() { + // Exits with a code zero if the environment variable SANITY_CHECK equals + // to "true". This enables testing that the current program is in a runnable + // state against the platform it's being executed on. + // + // See https://github.com/GoogleContainerTools/container-structure-test + match std::env::var("SANITY_CHECK") { + Ok(v) if v == "true" => { + println!("Sanity check passed, exiting without error"); + std::process::exit(0) + } + _ => {} + }; + let cli = Cli::parse(); match cli.command { diff --git a/sg.config.yaml b/sg.config.yaml index 62dba78f88ed..abaeb2393add 100644 --- a/sg.config.yaml +++ b/sg.config.yaml @@ -59,6 +59,8 @@ env: { "Name": "embeddings", "Host": "127.0.0.1:6099" }, { "Name": "zoekt-index-0", "Host": "127.0.0.1:6072" }, { "Name": "zoekt-index-1", "Host": "127.0.0.1:6073" }, + { "Name": "syntactic-code-intel-worker-0", "Host": "127.0.0.1:6075" }, + { "Name": "syntactic-code-intel-worker-1", "Host": "127.0.0.1:6076" }, { "Name": "zoekt-web-0", "Host": "127.0.0.1:3070", "DefaultPath": "/debug/requests/" }, { "Name": "zoekt-web-1", "Host": "127.0.0.1:3071", "DefaultPath": "/debug/requests/" } ] @@ -182,7 +184,7 @@ commands: fi go build -gcflags="$GCFLAGS" -o .bin/gitserver github.com/sourcegraph/sourcegraph/cmd/gitserver checkBinary: .bin/gitserver - env: &gitserverenv + env: HOSTNAME: 127.0.0.1:3178 watch: - lib @@ -197,7 +199,6 @@ commands: gitserver-0: <<: *gitserver_template env: - <<: *gitserverenv GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3501 GITSERVER_ADDR: 127.0.0.1:3501 SRC_REPOS_DIR: $HOME/.sourcegraph/repos_1 @@ -206,7 +207,6 @@ commands: gitserver-1: <<: *gitserver_template env: - <<: *gitserverenv GITSERVER_EXTERNAL_ADDR: 127.0.0.1:3502 GITSERVER_ADDR: 127.0.0.1:3502 SRC_REPOS_DIR: $HOME/.sourcegraph/repos_2 @@ -280,6 +280,7 @@ commands: -e QDRANT_INIT_FILE_PATH=/data/.qdrant-initialized \ --entrypoint /usr/local/bin/qdrant \ sourcegraph/qdrant:insiders + worker: cmd: | export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem) @@ -563,6 +564,46 @@ commands: - cmd/precise-code-intel-worker - lib/codeintel + syntactic-codeintel-worker-template: &syntactic_codeintel_worker_template + cmd: | + export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem) + .bin/syntactic-code-intel-worker + install: | + if [ -n "$DELVE" ]; then + export GCFLAGS='all=-N -l' + fi + + if [ ! -f $(./dev/scip-treesitter-install.sh which) ]; then + echo "Building scip-treesitter" + ./dev/scip-treesitter-install.sh + fi + + echo "Building codeintel-outkline-scip-worker" + go build -gcflags="$GCFLAGS" -o .bin/syntactic-code-intel-worker github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker + checkBinary: .bin/syntactic-code-intel-worker + watch: + - lib + - internal + - cmd/syntactic-code-intel-worker + - lib/codeintel + env: + SCIP_TREESITTER_COMMAND: dev/scip-treesitter-dev + + syntactic-code-intel-worker-0: + <<: *syntactic_codeintel_worker_template + env: + SYNTACTIC_CODE_INTEL_WORKER_ADDR: 127.0.0.1:6075 + + syntactic-code-intel-worker-1: + <<: *syntactic_codeintel_worker_template + cmd: | + export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem) + .bin/syntactic-code-intel-worker + env: + SYNTACTIC_CODE_INTEL_WORKER_ADDR: 127.0.0.1:6076 + + + executor-template: &executor_template # TMPDIR is set here so it's not set in the `install` process, which would trip up `go build`. cmd: | @@ -1039,6 +1080,7 @@ bazelCommands: GITSERVER_ADDR: 127.0.0.1:3502 SRC_REPOS_DIR: $HOME/.sourcegraph/repos_2 SRC_PROF_HTTP: 127.0.0.1:3552 + codeintel-worker: precmd: | export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem) @@ -1195,6 +1237,21 @@ commandsets: - grafana - prometheus + codeintel-syntactic: + requiresDevPrivate: true + checks: + - docker + - redis + - postgres + - git + commands: + - frontend + - worker + - blobstore + - syntactic-code-intel-worker-0 + - syntactic-code-intel-worker-1 + + codeintel: requiresDevPrivate: true checks: