Skip to content

Commit 7dde52c

Browse files
authored
Merge pull request #7131 from RasmusWL/wsgiref.simple_server
Python: Model `wsgiref.simple_server` applications
2 parents 2a5e0a3 + e265259 commit 7dde52c

File tree

7 files changed

+315
-0
lines changed

7 files changed

+315
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.

python/ql/lib/semmle/python/frameworks/Stdlib.qll

+157
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ private import semmle.python.dataflow.new.RemoteFlowSources
1010
private import semmle.python.Concepts
1111
private import semmle.python.ApiGraphs
1212
private import semmle.python.frameworks.PEP249
13+
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
14+
private import semmle.python.frameworks.internal.SelfRefMixin
1315
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
1416

1517
/** Provides models for the Python standard library. */
@@ -1349,6 +1351,161 @@ private module StdlibPrivate {
13491351
}
13501352
}
13511353

1354+
// ---------------------------------------------------------------------------
1355+
// wsgiref.simple_server
1356+
// ---------------------------------------------------------------------------
1357+
/** Provides models for the `wsgiref.simple_server` module. */
1358+
private module WsgirefSimpleServer {
1359+
class WsgiServerSubclass extends Class, SelfRefMixin {
1360+
WsgiServerSubclass() {
1361+
this.getABase() =
1362+
API::moduleImport("wsgiref")
1363+
.getMember("simple_server")
1364+
.getMember("WSGIServer")
1365+
.getASubclass*()
1366+
.getAUse()
1367+
.asExpr()
1368+
}
1369+
}
1370+
1371+
/**
1372+
* A function that was passed to the `set_app` method of a
1373+
* `wsgiref.simple_server.WSGIServer` instance.
1374+
*
1375+
* See https://docs.python.org/3.10/library/wsgiref.html#wsgiref.simple_server.WSGIServer.set_app
1376+
*
1377+
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L137
1378+
* for how a request is processed and given to an application.
1379+
*/
1380+
class WsgirefSimpleServerApplication extends HTTP::Server::RequestHandler::Range {
1381+
WsgirefSimpleServerApplication() {
1382+
exists(DataFlow::Node appArg, DataFlow::CallCfgNode setAppCall |
1383+
(
1384+
setAppCall =
1385+
API::moduleImport("wsgiref")
1386+
.getMember("simple_server")
1387+
.getMember("WSGIServer")
1388+
.getASubclass*()
1389+
.getReturn()
1390+
.getMember("set_app")
1391+
.getACall()
1392+
or
1393+
setAppCall
1394+
.(DataFlow::MethodCallNode)
1395+
.calls(any(WsgiServerSubclass cls).getASelfRef(), "set_app")
1396+
) and
1397+
appArg in [setAppCall.getArg(0), setAppCall.getArgByName("application")]
1398+
|
1399+
appArg = poorMansFunctionTracker(this)
1400+
)
1401+
}
1402+
1403+
override Parameter getARoutedParameter() { none() }
1404+
1405+
override string getFramework() { result = "Stdlib: wsgiref.simple_server application" }
1406+
}
1407+
1408+
/**
1409+
* The parameter of a `WsgirefSimpleServerApplication` that takes the WSGI environment
1410+
* when processing a request.
1411+
*
1412+
* See https://docs.python.org/3.10/library/wsgiref.html#wsgiref.simple_server.WSGIRequestHandler.get_environ
1413+
*/
1414+
class WSGIEnvirontParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
1415+
WSGIEnvirontParameter() {
1416+
exists(WsgirefSimpleServerApplication func |
1417+
if func.isMethod()
1418+
then this.getParameter() = func.getArg(1)
1419+
else this.getParameter() = func.getArg(0)
1420+
)
1421+
}
1422+
1423+
override string getSourceType() {
1424+
result = "Stdlib: wsgiref.simple_server application: WSGI environment parameter"
1425+
}
1426+
}
1427+
1428+
/**
1429+
* Gets a reference to the parameter of a `WsgirefSimpleServerApplication` that
1430+
* takes the `start_response` function.
1431+
*
1432+
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L225-L252
1433+
*/
1434+
private DataFlow::TypeTrackingNode startResponse(DataFlow::TypeTracker t) {
1435+
t.start() and
1436+
exists(WsgirefSimpleServerApplication func |
1437+
if func.isMethod()
1438+
then result.(DataFlow::ParameterNode).getParameter() = func.getArg(2)
1439+
else result.(DataFlow::ParameterNode).getParameter() = func.getArg(1)
1440+
)
1441+
or
1442+
exists(DataFlow::TypeTracker t2 | result = startResponse(t2).track(t2, t))
1443+
}
1444+
1445+
/**
1446+
* Gets a reference to the parameter of a `WsgirefSimpleServerApplication` that
1447+
* takes the `start_response` function.
1448+
*
1449+
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L225-L252
1450+
*/
1451+
DataFlow::Node startResponse() { startResponse(DataFlow::TypeTracker::end()).flowsTo(result) }
1452+
1453+
/**
1454+
* Gets a reference to the `write` function (that will write data to the response),
1455+
* which is the return value from calling the `start_response` function.
1456+
*
1457+
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L225-L252
1458+
*/
1459+
private DataFlow::TypeTrackingNode writeFunction(DataFlow::TypeTracker t) {
1460+
t.start() and
1461+
result.(DataFlow::CallCfgNode).getFunction() = startResponse()
1462+
or
1463+
exists(DataFlow::TypeTracker t2 | result = writeFunction(t2).track(t2, t))
1464+
}
1465+
1466+
/**
1467+
* Gets a reference to the `write` function (that will write data to the response),
1468+
* which is the return value from calling the `start_response` function.
1469+
*
1470+
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L225-L252
1471+
*/
1472+
DataFlow::Node writeFunction() { writeFunction(DataFlow::TypeTracker::end()).flowsTo(result) }
1473+
1474+
/**
1475+
* A call to the `write` function.
1476+
*
1477+
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L276
1478+
*/
1479+
class WsgirefSimpleServerApplicationWriteCall extends HTTP::Server::HttpResponse::Range,
1480+
DataFlow::CallCfgNode {
1481+
WsgirefSimpleServerApplicationWriteCall() { this.getFunction() = writeFunction() }
1482+
1483+
override DataFlow::Node getBody() { result in [this.getArg(0), this.getArgByName("data")] }
1484+
1485+
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
1486+
1487+
override string getMimetypeDefault() { none() }
1488+
}
1489+
1490+
/**
1491+
* A return from a `WsgirefSimpleServerApplication`, which is included in the response body.
1492+
*/
1493+
class WsgirefSimpleServerApplicationReturn extends HTTP::Server::HttpResponse::Range,
1494+
DataFlow::CfgNode {
1495+
WsgirefSimpleServerApplicationReturn() {
1496+
exists(WsgirefSimpleServerApplication requestHandler |
1497+
node = requestHandler.getAReturnValueFlowNode()
1498+
)
1499+
}
1500+
1501+
override DataFlow::Node getBody() { result = this }
1502+
1503+
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
1504+
1505+
override string getMimetypeDefault() { none() }
1506+
}
1507+
}
1508+
13521509
// ---------------------------------------------------------------------------
13531510
// sqlite3
13541511
// ---------------------------------------------------------------------------

python/ql/lib/semmle/python/frameworks/internal/PoorMansFunctionResolution.qll

+29
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,22 @@ private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker
6262
exists(DataFlow::TypeTracker t2 | result = poorMansFunctionTracker(t2, func).track(t2, t))
6363
}
6464

65+
/**
66+
* Gets a reference to `func`. `func` must be defined inside a class, and the reference
67+
* will be inside a different method of the same class.
68+
*/
69+
private DataFlow::Node getSimpleMethodReferenceWithinClass(Function func) {
70+
// TODO: Should take MRO into account
71+
exists(Class cls, Function otherFunc, DataFlow::Node selfRefOtherFunc |
72+
pragma[only_bind_into](cls).getAMethod() = func and
73+
pragma[only_bind_into](cls).getAMethod() = otherFunc
74+
|
75+
selfRefOtherFunc.getALocalSource().(DataFlow::ParameterNode).getParameter() =
76+
otherFunc.getArg(0) and
77+
result.(DataFlow::AttrRead).accesses(selfRefOtherFunc, func.getName())
78+
)
79+
}
80+
6581
/**
6682
* INTERNAL: Do not use.
6783
*
@@ -80,7 +96,20 @@ private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker
8096
* inst = MyClass()
8197
* print(inst.my_method)
8298
* ```
99+
*
100+
* But it is able to handle simple method calls within a class (but does not take MRO into
101+
* account).
102+
* ```py
103+
* class MyClass:
104+
* def method1(self);
105+
* pass
106+
*
107+
* def method2(self);
108+
* self.method1()
109+
* ```
83110
*/
84111
DataFlow::Node poorMansFunctionTracker(Function func) {
85112
poorMansFunctionTracker(DataFlow::TypeTracker::end(), func).flowsTo(result)
113+
or
114+
result = getSimpleMethodReferenceWithinClass(func)
86115
}

python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.expected

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
private import python
2+
private import semmle.python.dataflow.new.DataFlow
3+
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
4+
import TestUtilities.InlineExpectationsTest
5+
6+
class InlinePoorMansFunctionResolutionTest extends InlineExpectationsTest {
7+
InlinePoorMansFunctionResolutionTest() { this = "InlinePoorMansFunctionResolutionTest" }
8+
9+
override string getARelevantTag() { result = "resolved" }
10+
11+
override predicate hasActualResult(Location location, string element, string tag, string value) {
12+
exists(location.getFile().getRelativePath()) and
13+
exists(Function func, DataFlow::Node ref |
14+
ref = poorMansFunctionTracker(func) and
15+
not ref.asExpr() instanceof FunctionExpr and
16+
// exclude things like `GSSA variable func`
17+
exists(ref.asExpr()) and
18+
// exclude decorator calls (which with our extractor rewrites does reference the
19+
// function)
20+
not ref.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall()
21+
|
22+
value = func.getName() and
23+
tag = "resolved" and
24+
element = ref.toString() and
25+
location = ref.getLocation()
26+
)
27+
}
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
def func():
2+
print("func")
3+
4+
func() # $ resolved=func
5+
6+
7+
class MyBase:
8+
def base_method(self):
9+
print("base_method", self)
10+
11+
12+
class MyClass(MyBase):
13+
def method1(self):
14+
print("method1", self)
15+
16+
@classmethod
17+
def cls_method(cls):
18+
print("cls_method", cls)
19+
20+
@staticmethod
21+
def static():
22+
print("static")
23+
24+
def method2(self):
25+
print("method2", self)
26+
self.method1() # $ resolved=method1
27+
self.base_method()
28+
self.cls_method() # $ resolved=cls_method
29+
self.static() # $ resolved=static
30+
31+
32+
33+
34+
MyClass.cls_method() # $ resolved=cls_method
35+
MyClass.static() # $ resolved=static
36+
37+
x = MyClass()
38+
x.base_method()
39+
x.method1()
40+
x.cls_method()
41+
x.static()
42+
x.method2()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# This test file demonstrates how to use an application with a wsgiref.simple_server
2+
# see https://docs.python.org/3/library/wsgiref.html#wsgiref.simple_server.WSGIServer
3+
import sys
4+
import wsgiref.simple_server
5+
6+
def ignore(*arg, **kwargs): pass
7+
ensure_tainted = ensure_not_tainted = ignore
8+
9+
ADDRESS = ("localhost", 8000)
10+
11+
12+
# I wanted to showcase that we handle both functions and bound-methods, so it's possible
13+
# to run this test-file in 2 different ways.
14+
15+
def func(environ, start_response): # $ requestHandler
16+
ensure_tainted(
17+
environ, # $ tainted
18+
environ["PATH_INFO"], # $ tainted
19+
)
20+
write = start_response("200 OK", [("Content-Type", "text/plain")])
21+
write(b"hello") # $ HttpResponse responseBody=b"hello"
22+
write(data=b" ") # $ HttpResponse responseBody=b" "
23+
24+
# function return value should be an iterable that will also be written to the
25+
# response.
26+
return [b"world", b"!"] # $ HttpResponse responseBody=List
27+
28+
29+
class MyServer(wsgiref.simple_server.WSGIServer):
30+
def __init__(self):
31+
super().__init__(ADDRESS, wsgiref.simple_server.WSGIRequestHandler)
32+
self.set_app(self.my_method)
33+
34+
def my_method(self, _env, start_response): # $ requestHandler
35+
start_response("200 OK", [])
36+
return [b"my_method"] # $ HttpResponse responseBody=List
37+
38+
39+
case = sys.argv[1]
40+
if case == "1":
41+
server = wsgiref.simple_server.WSGIServer(ADDRESS, wsgiref.simple_server.WSGIRequestHandler)
42+
server.set_app(func)
43+
elif case == "2":
44+
server = MyServer()
45+
elif case == "3":
46+
server = MyServer()
47+
def func3(_env, start_response): # $ requestHandler
48+
start_response("200 OK", [])
49+
return [b"foo"] # $ HttpResponse responseBody=List
50+
server.set_app(func3)
51+
else:
52+
sys.exit("wrong case")
53+
54+
55+
print(f"Running on http://{ADDRESS[0]}:{ADDRESS[1]}")
56+
57+
server.serve_forever()

0 commit comments

Comments
 (0)