Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Syntactic indexing: syntactic codeintel worker scaffolding #59747

Merged
merged 17 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions cmd/syntactic-code-intel-worker/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
load("@rules_oci//oci:defs.bzl", "oci_image", "oci_push", "oci_tarball")
load("@rules_pkg//:pkg.bzl", "pkg_tar")
load("@container_structure_test//:defs.bzl", "container_structure_test")
load("//dev:oci_defs.bzl", "image_repository")

go_library(
name = "syntactic-code-intel-worker_lib",
srcs = ["main.go"],
importpath = "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker",
visibility = ["//visibility:private"],
deps = [
"//cmd/syntactic-code-intel-worker/shared",
"//internal/sanitycheck",
"//internal/service/svcmain",
],
)

go_binary(
name = "syntactic-code-intel-worker",
embed = [":syntactic-code-intel-worker_lib"],
visibility = ["//visibility:public"],
x_defs = {
"github.com/sourcegraph/sourcegraph/internal/version.version": "{STABLE_VERSION}",
"github.com/sourcegraph/sourcegraph/internal/version.timestamp": "{VERSION_TIMESTAMP}",
},
)

pkg_tar(
name = "tar_syntactic-code-intel-worker",
srcs = [":syntactic-code-intel-worker"],
)

pkg_tar(
name = "tar_scip-treesitter",
srcs = ["//docker-images/syntax-highlighter/crates/scip-treesitter-cli:scip-treesitter"],
package_dir = "/usr/local/bin",
)

oci_image(
name = "image",
base = "@wolfi_base",
entrypoint = [
"/sbin/tini",
"--",
"/syntactic-code-intel-worker",
],
tars = [
":tar_syntactic-code-intel-worker",
"tar_scip-treesitter",
],
user = "sourcegraph",
)

oci_tarball(
name = "image_tarball",
image = ":image",
repo_tags = ["syntactic-code-intel-worker:candidate"],
)

container_structure_test(
name = "image_test",
timeout = "short",
configs = ["image_test.yaml"],
driver = "docker",
image = ":image",
tags = [
"exclusive",
"requires-network",
],
)

oci_push(
name = "candidate_push",
image = ":image",
repository = image_repository("syntactic-code-intel-worker"),
)
1 change: 1 addition & 0 deletions cmd/syntactic-code-intel-worker/CODENOTIFY
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# See https://github.com/sourcegraph/codenotify for documentation.
8 changes: 8 additions & 0 deletions cmd/syntactic-code-intel-worker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Syntactic code intel worker

🚧 WORK IN PROGRESS 🚧

Stateless service that handles generating SCIP data for codebases
using Tree-sitter for powering syntax-based code navigation.

[Design docs](https://docs.google.com/document/d/14MHauv52o4zTFiV6gC6NOJZxcJpglK-ElWa64gqeKDo/edit) (Sourcegraph internal)
21 changes: 21 additions & 0 deletions cmd/syntactic-code-intel-worker/image_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
schemaVersion: "2.0.0"

commandTests:
- name: "worker binary is runnable"
command: "/syntactic-code-intel-worker"
envVars:
- key: "SANITY_CHECK"
value: "true"

- name: "scip treesitter binary is runnable"
command: "/usr/local/bin/scip-treesitter"
envVars:
- key: "SANITY_CHECK"
value: "true"

- name: "not running as root"
command: "/usr/bin/id"
args:
- -u
excludedOutput: ["^0"]
exitCode: 0
12 changes: 12 additions & 0 deletions cmd/syntactic-code-intel-worker/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package main

import (
"github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker/shared"
"github.com/sourcegraph/sourcegraph/internal/sanitycheck"
"github.com/sourcegraph/sourcegraph/internal/service/svcmain"
)

func main() {
sanitycheck.Pass()
svcmain.SingleServiceMain(shared.Service)
}
24 changes: 24 additions & 0 deletions cmd/syntactic-code-intel-worker/shared/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "shared",
srcs = [
"config.go",
"service.go",
"shared.go",
],
importpath = "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker/shared",
visibility = ["//visibility:public"],
deps = [
"//internal/codeintel/shared/lsifuploadstore",
"//internal/debugserver",
"//internal/encryption/keyring",
"//internal/env",
"//internal/goroutine",
"//internal/httpserver",
"//internal/observation",
"//internal/service",
"//lib/errors",
"@com_github_sourcegraph_log//:log",
],
)
55 changes: 55 additions & 0 deletions cmd/syntactic-code-intel-worker/shared/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package shared

import (
"net"
"strconv"
"time"

"github.com/sourcegraph/sourcegraph/internal/codeintel/shared/lsifuploadstore"
"github.com/sourcegraph/sourcegraph/internal/env"
"github.com/sourcegraph/sourcegraph/lib/errors"
)

type Config struct {
env.BaseConfig

WorkerPollInterval time.Duration
WorkerConcurrency int
WorkerBudget int64
MaximumRuntimePerJob time.Duration
SCIPUploadStoreConfig *lsifuploadstore.Config
CliPath string
ListenAddress string
}

const DefaultPort = 3188

func (c *Config) Load() {
c.SCIPUploadStoreConfig = &lsifuploadstore.Config{}
c.SCIPUploadStoreConfig.Load()

c.WorkerPollInterval = c.GetInterval("SYNTACTIC_CODE_INTEL_WORKER_POLL_INTERVAL", "1s", "Interval between queries to the repository queue")
c.WorkerConcurrency = c.GetInt("SYNTACTIC_CODE_INTEL_WORKER_CONCURRENCY", "1", "The maximum number of repositories that can be processed concurrently.")
c.WorkerBudget = int64(c.GetInt("SYNTACTIC_CODE_INTEL_WORKER_BUDGET", "0", "The amount of compressed input data (in bytes) a worker can process concurrently. Zero acts as an infinite budget."))
c.MaximumRuntimePerJob = c.GetInterval("SYNTACTIC_CODE_INTEL_WORKER_MAXIMUM_RUNTIME_PER_JOB", "25m", "The maximum time a single repository indexing job can take")

c.CliPath = c.Get("SCIP_TREESITTER_COMMAND", "scip-treesitter", "TODO: fill in description")

c.ListenAddress = c.GetOptional("SYNTACTIC_CODE_INTEL_WORKER_ADDR", "The address under which the syntactic codeintel worker API listens. Can include a port.")
// Fall back to a reasonable default.
if c.ListenAddress == "" {
port := strconv.Itoa(DefaultPort)
host := ""
if env.InsecureDev {
host = "127.0.0.1"
}
c.ListenAddress = net.JoinHostPort(host, port)
}
}

func (c *Config) Validate() error {
var errs error
errs = errors.Append(errs, c.BaseConfig.Validate())
errs = errors.Append(errs, c.SCIPUploadStoreConfig.Validate())
return errs
}
26 changes: 26 additions & 0 deletions cmd/syntactic-code-intel-worker/shared/service.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package shared

import (
"context"

"github.com/sourcegraph/sourcegraph/internal/debugserver"
"github.com/sourcegraph/sourcegraph/internal/env"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/service"
)

type svc struct{}

func (svc) Name() string { return "syntactic-code-intel-worker" }

func (svc) Configure() (env.Config, []debugserver.Endpoint) {
var config Config
config.Load()
return &config, nil
}

func (svc) Start(ctx context.Context, observationCtx *observation.Context, ready service.ReadyFunc, config env.Config) error {
return Main(ctx, observationCtx, ready, *config.(*Config))
}

var Service service.Service = svc{}
40 changes: 40 additions & 0 deletions cmd/syntactic-code-intel-worker/shared/shared.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package shared

import (
"context"

"net/http"
"time"

"github.com/sourcegraph/log"
"github.com/sourcegraph/sourcegraph/internal/encryption/keyring"
"github.com/sourcegraph/sourcegraph/internal/goroutine"
"github.com/sourcegraph/sourcegraph/internal/httpserver"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/service"
"github.com/sourcegraph/sourcegraph/lib/errors"
)

func Main(ctx context.Context, observationCtx *observation.Context, ready service.ReadyFunc, config Config) error {
logger := observationCtx.Logger

if err := keyring.Init(ctx); err != nil {
return errors.Wrap(err, "initializing keyring")
}

logger.Info("Syntactic code intel worker running",
log.String("path to scip-treesitter CLI", config.CliPath),
log.String("API address", config.ListenAddress))

// Initialize health server
server := httpserver.NewFromAddr(config.ListenAddress, &http.Server{
ReadTimeout: 75 * time.Second,
WriteTimeout: 10 * time.Minute,
Handler: httpserver.NewHandler(nil),
})

// Go!
goroutine.MonitorBackgroundRoutines(ctx, server)

return nil
}
1 change: 1 addition & 0 deletions dev/check/go-dbconn-import.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ allowed_prefix=(
# Transitively depends on updatecheck package which imports but does not use DB
github.com/sourcegraph/sourcegraph/cmd/pings
github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker
github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker
github.com/sourcegraph/sourcegraph/cmd/repo-updater
# Transitively depends on zoekt package which imports but does not use DB
github.com/sourcegraph/sourcegraph/cmd/searcher
Expand Down
1 change: 1 addition & 0 deletions dev/linters/dbconn/dbconn.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ var allowedToImport = []string{
// Transitively depends on updatecheck package which imports but does not use DB
"github.com/sourcegraph/sourcegraph/cmd/pings",
"github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker",
"github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker",
"github.com/sourcegraph/sourcegraph/cmd/repo-updater",
// Transitively depends on zoekt package which imports but does not use DB
"github.com/sourcegraph/sourcegraph/cmd/searcher",
Expand Down
17 changes: 17 additions & 0 deletions dev/scip-treesitter-dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash

# Wrapper for `scip-treesitter` similar to `dev/scip-ctags-dev`.
#
# To use an alternate scip-treesitter binary for development, invoke
# `SCIP_TREESITTER_COMMAND=path/to/scip-treesitter sg start`.

root="$(dirname "${BASH_SOURCE[0]}")/.." >/dev/null
TARGET=$("$root/dev/scip-treesitter-install.sh" which)

if [ ! -f "${TARGET}" ]; then
echo "scip-treesitter is not installed, please run ./dev/scip-treesitter-install.sh"
echo "Alternatively you can use SCIP_TREESITTER_COMMAND=path/to/scip-treesitter to use your own binary."
exit 1
else
${TARGET} "$@"
fi
34 changes: 34 additions & 0 deletions dev/scip-treesitter-install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash

set -euf -o pipefail
pushd "$(dirname "${BASH_SOURCE[0]}")/.." >/dev/null
mkdir -p .bin

# TODO: add similar task to zoekt alpine

NAME="scip-treesitter"
TARGET="$PWD/.bin/${NAME}"

if [ $# -ne 0 ]; then
if [ "$1" == "which" ]; then
echo "$TARGET"
exit 0
fi
fi

function ctrl_c() {
printf "[-] Installation cancelled.\n"
exit 1
}

trap ctrl_c INT

function build_scip_treesitter {
cd docker-images/syntax-highlighter/crates/scip-treesitter-cli
cargo build --bin scip-treesitter --target-dir target
cp ./target/release/scip-treesitter "$TARGET"
}

build_scip_treesitter

popd >/dev/null
2 changes: 1 addition & 1 deletion docker-images/syntax-highlighter/Cargo.Bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ load("@crate_index//:defs.bzl", "aliases", "all_crate_deps")
load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_library", "rust_test")

rust_binary(
name = "scip-treesitter-cli",
srcs = glob(["src/main.rs"]),
name = "scip-treesitter",
srcs = ["src/bin/scip-treesitter.rs"],
aliases = aliases(),
proc_macro_deps = all_crate_deps(
proc_macro = True,
Expand Down Expand Up @@ -71,20 +71,20 @@ rust_test(
],
allow_empty = False,
),
data = [":scip-treesitter-cli"] +
data = [":scip-treesitter"] +
glob(
["tests/snapshots/**"],
allow_empty = False,
),
env = {
"INSTA_WORKSPACE_ROOT": ".",
"RUST_BACKTRACE": "1",
"SCIP_CLI_LOCATION": "$(rootpath :scip-treesitter-cli)",
"SCIP_CLI_LOCATION": "$(rootpath :scip-treesitter)",
},
deps = all_crate_deps(
normal = True,
) + [
":scip-treesitter-cli",
":scip-treesitter",
":scip-treesitter-cli-lib",
] + WORKSPACE_DEPS,
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ name = "scip-treesitter-cli"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[[bin]]
name = "scip-treesitter"

[dependencies]
lazy_static = "1.0"
Expand Down
Loading