Skip to content

Commit 58b0eb1

Browse files
author
Alvaro Muñoz
committed
Initial commit
1 parent ebe7e7f commit 58b0eb1

File tree

11,125 files changed

+1170271
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

11,125 files changed

+1170271
-0
lines changed

Diff for: Makefile

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Note: This is meant for codeql_kernel developer use only
2+
.PHONY: data-files build install clean test
3+
4+
data-files: clean
5+
mkdir -p jupyter-data/share/jupyter/kernels/codeql
6+
cp codeql_kernel/kernel.json jupyter-data/share/jupyter/kernels/codeql
7+
cp codeql_kernel/images/* jupyter-data/share/jupyter/kernels/codeql/
8+
9+
install: data-files
10+
python build_treesitter.py
11+
pip install -e ".[test]"
12+
13+
clean:
14+
rm -rf jupyter-data
15+
rm -rf build
16+
rm -rf dist
17+
18+
19+
build: data-files
20+
pip install build twine
21+
python -m build .
22+
twine check --strict dist/*
23+
24+
test: clean
25+
pytest
26+
make clean

Diff for: README.md

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# CodeQL kernel for Jupyter
2+
3+
## Local Installation
4+
5+
To install from a git checkout, run:
6+
7+
```bash
8+
$ pip install jupyter jupyterlab tree_sitter
9+
$ make install
10+
```
11+
12+
Verify installation:
13+
14+
Run `jupyter kernelspec list` and check that `codeql` is available as a Jupyter kernel:
15+
16+
```bash
17+
$ jupyter kernelspec list
18+
Available kernels:
19+
codeql /Users/pwntester/.pyenv/versions/3.8.13/share/jupyter/kernels/codeql
20+
python3 /Users/pwntester/.pyenv/versions/3.8.13/share/jupyter/kernels/python3
21+
```
22+
23+
# Highlight extension
24+
25+
Jupyter-lab extension to highlight CodeQL syntax
26+
27+
## Local Installation
28+
29+
```bash
30+
cd jupyterlab-codeql-highlight
31+
npm install
32+
jupyter labextension link .
33+
```
34+
35+
# Commands
36+
37+
- `%set_database <CodeQL DB directory>`: Sets the Database for analysis.
38+
39+
# Example
40+
41+
```bash
42+
cd example
43+
jupyter-lab test.ipynb
44+
```
45+
46+
Wait for each cell to run before running other cells.
47+
48+
# Disclaimer
49+
50+
This is an experimental project not maintained by the GitHub CodeQL teams.
51+
Any contributions are welcomed!

Diff for: build_treesitter.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from tree_sitter import Language
2+
3+
Language.build_library('codeql_kernel/tree-sitter-ql.so', ['vendor/tree-sitter-ql'])

Diff for: codeql_kernel/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .kernel import CodeQLKernel
2+
from .codeql import CLIClient, QueryClient
3+
from ._version import __version__

Diff for: codeql_kernel/__main__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from ipykernel.kernelapp import IPKernelApp
2+
from . import CodeQLKernel
3+
4+
IPKernelApp.launch_instance(kernel_class=CodeQLKernel)

Diff for: codeql_kernel/__pycache__/__init__.cpython-38.pyc

326 Bytes
Binary file not shown.

Diff for: codeql_kernel/__pycache__/__main__.cpython-38.pyc

321 Bytes
Binary file not shown.

Diff for: codeql_kernel/__pycache__/_version.cpython-38.pyc

198 Bytes
Binary file not shown.

Diff for: codeql_kernel/__pycache__/codeql.cpython-38.pyc

6.26 KB
Binary file not shown.

Diff for: codeql_kernel/__pycache__/jsonrpc.cpython-38.pyc

17.4 KB
Binary file not shown.

Diff for: codeql_kernel/__pycache__/kernel.cpython-38.pyc

5.71 KB
Binary file not shown.

Diff for: codeql_kernel/__pycache__/rawrpc.cpython-38.pyc

3.53 KB
Binary file not shown.

Diff for: codeql_kernel/_version.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = '0.0.1'

Diff for: codeql_kernel/codeql.py

+220
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
import logging
2+
import os
3+
import tempfile
4+
import time
5+
from subprocess import PIPE, Popen
6+
from typing import Optional, Tuple
7+
8+
from .jsonrpc import RPC as JSONRPC
9+
from .rawrpc import RPC as RawRPC
10+
11+
12+
class CLIClient:
13+
"""
14+
Represents a JSONRPC client to connect to CodeQL CLI Server
15+
"""
16+
17+
def __init__(self):
18+
self.cache = {"ram": []}
19+
self.conn = RawRPC(
20+
[
21+
"codeql",
22+
"execute",
23+
"cli-server",
24+
"--logdir",
25+
"/tmp/codeql_kernel_cliserver",
26+
]
27+
)
28+
29+
def stop(self):
30+
self.conn.stop()
31+
32+
def resolve_ram(self) -> Tuple[Optional[str], Optional[list]]:
33+
if self.cache.get("ram"):
34+
return (None, self.cache.get("ram"))
35+
else:
36+
cmd = ["resolve", "ram", "--format=json"]
37+
(err, result) = self.conn.request(cmd)
38+
if err:
39+
return (err, None)
40+
self.cache["ram"] = [x for x in result if x.startswith("-J")]
41+
return (None, self.cache.get("ram"))
42+
43+
def resolve_metadata(self, query) -> Tuple[Optional[str], dict]:
44+
cmd = ["resolve", "metadata", "--format=json", query]
45+
return self.conn.request(cmd)
46+
47+
def resolve_database(self, db_path) -> Tuple[Optional[str], dict]:
48+
cmd = ["resolve", "database", "--format=json", db_path]
49+
return self.conn.request(cmd)
50+
51+
def resolve_library_path(self, query) -> Tuple[Optional[str], Optional[dict]]:
52+
cmd = ["resolve", "library-path", "--format=json", "--query", query]
53+
return self.conn.request(cmd)
54+
55+
def bqrs_info(self, bqrs_path) -> Tuple[Optional[str], dict]:
56+
cmd = ["bqrs", "info", "--format=json", bqrs_path]
57+
return self.conn.request(cmd)
58+
59+
def bqrs_decode(self, bqrs_path) -> Tuple[Optional[str], Optional[str]]:
60+
(err, ram_opts) = self.resolve_ram()
61+
if err or not ram_opts:
62+
return (f"Error resolving ram options {err}", None)
63+
results_path = tempfile.NamedTemporaryFile(delete=False)
64+
cmd = [
65+
"bqrs",
66+
"decode",
67+
"--format=csv",
68+
f"-o={results_path.name}",
69+
"--entities=string,url",
70+
bqrs_path,
71+
]
72+
cmd.extend(ram_opts)
73+
(err, _) = self.conn.request(cmd)
74+
if err:
75+
return (f"Error decoding bqrs file {err}", None)
76+
if os.path.exists(results_path.name):
77+
with open(results_path.name, "r") as f:
78+
data = f.read()
79+
# return json.loads(data)
80+
return (None, data)
81+
else:
82+
return ("Error decoding results", None)
83+
84+
85+
class QueryClient:
86+
"""
87+
Represents a JSONRPC client to connect to CodeQL Query Server
88+
"""
89+
90+
def __init__(self, on_progress=None, on_result=None):
91+
self._cli_client: CLIClient = CLIClient()
92+
cmd = ["codeql", "execute", "query-server2", "--threads=0", "--evaluator-log-level", "5"]
93+
# debug
94+
# cmd.extend(["--debug", "--tuple-counting", "-v", "--log-to-stderr"])
95+
# --save-cache --max-disk-cache XX
96+
(err, ram_opts) = self._cli_client.resolve_ram()
97+
if err or not ram_opts:
98+
return (f"Error resolving ram options {err}", None)
99+
cmd.extend(ram_opts)
100+
self._proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
101+
handlers = {}
102+
if on_progress:
103+
handlers["ql/progressUpdated"] = on_progress
104+
self._conn = JSONRPC(
105+
handlers=handlers, stdout=self._proc.stdin, stdin=self._proc.stdout
106+
)
107+
self._progress_id = 0
108+
self._evaluate_id = 0
109+
self._db_metadata = {}
110+
# TODO: wait for query server to be ready
111+
time.sleep(2)
112+
113+
def stop(self):
114+
if self._proc.stdin:
115+
self._proc.stdin.close()
116+
if self._proc.stdout:
117+
self._proc.stdout.close()
118+
self._proc.terminate()
119+
self._proc.wait()
120+
if self._cli_client:
121+
self._cli_client.stop()
122+
123+
def next_progress_id(self) -> int:
124+
self._progress_id += 1
125+
return self._progress_id
126+
127+
def next_evaluate_id(self) -> int:
128+
self._evaluate_id += 1
129+
return self._evaluate_id
130+
131+
def register_database(self, db_path) -> Optional[str]:
132+
"""
133+
Register a database with the query server
134+
"""
135+
if not db_path.endswith("/"):
136+
db_path = db_path + "/"
137+
if not os.path.isdir(db_path):
138+
return f"Database path {db_path} is not a directory"
139+
140+
(err, db_metadata) = self._cli_client.resolve_database(db_path)
141+
if err:
142+
return "Failed to resolve database metadata"
143+
144+
# TODO: implement on-the-fly query patching
145+
146+
params = {
147+
"body": {
148+
"databases": [db_path],
149+
"progressId": self.next_progress_id(),
150+
}
151+
}
152+
(err, _) = self._conn.request("evaluation/registerDatabases", args=params)
153+
154+
if err:
155+
return err
156+
157+
self._db_metadata = db_metadata
158+
self._db_metadata["path"] = db_path
159+
160+
return None
161+
162+
def run_query(
163+
self, query_path, quick_eval={}
164+
) -> Tuple[Optional[str], Optional[str]]:
165+
logging.info(f"Running query {query_path}")
166+
bqrs_path = tempfile.NamedTemporaryFile(suffix=".bqrs").name
167+
target = {"query": {"xx": ""}}
168+
if bool(quick_eval):
169+
target = {
170+
"quickEval": {
171+
"quickEvalPos": {
172+
"fileName": query_path,
173+
"line": quick_eval.get("startLine"),
174+
"column": quick_eval.get("startColumn"),
175+
"endLine": quick_eval.get("endLine"),
176+
"endColumn": quick_eval.get("endColumn"),
177+
}
178+
}
179+
}
180+
181+
run_queries_params = {
182+
"body": {
183+
"db": self._db_metadata["path"],
184+
# TODO: get additional packs from ENV, command, config, etc.
185+
"additionalPacks": ["/Users/pwntester/src/github.com/github/codeql"],
186+
"externalInputs": [],
187+
"singletonExternalInputs": [], # opts.templateValues or {},
188+
"outputPath": bqrs_path,
189+
"queryPath": query_path,
190+
"target": target,
191+
},
192+
"progressId": self.next_progress_id(),
193+
}
194+
195+
(err, resp) = self._conn.request(
196+
"evaluation/runQuery", args=run_queries_params
197+
)
198+
199+
if resp and resp["resultType"] != 0:
200+
return (resp["message"], None)
201+
202+
if err:
203+
return (str(err), None)
204+
205+
if os.path.exists(bqrs_path):
206+
(err, bqrs_info) = self._cli_client.bqrs_info(bqrs_path)
207+
if err:
208+
return (err, "")
209+
if not bqrs_info or not bqrs_info["result-sets"]:
210+
return ("Failed to get bqrs info", "")
211+
count = bqrs_info["result-sets"][0]["rows"]
212+
for result_set in bqrs_info["result-sets"]:
213+
if result_set["name"] == "#select":
214+
count = result_set["rows"]
215+
if count > 0:
216+
return self._cli_client.bqrs_decode(bqrs_path)
217+
else:
218+
return (None, "No results")
219+
else:
220+
return (f"Failed to find results file at {bqrs_path}", "")

Diff for: codeql_kernel/images/logo-32x32.png

7.71 KB
Loading

Diff for: codeql_kernel/images/logo-64x64.png

12.2 KB
Loading

0 commit comments

Comments
 (0)