diff --git a/scripts/to2.sh b/scripts/to2.sh new file mode 100755 index 00000000..911cfe2c --- /dev/null +++ b/scripts/to2.sh @@ -0,0 +1,61 @@ +source ./venv/bin/activate + +pushd src + +pip install future-fstrings 3to2 strip-hints +for file in $(find {lumigo_tracer,test} -type f); do + # don't use f-strings + future-fstrings-show "$file" > "$file.tmp"; + # add future print + sed -i '' '1s/^/from __future__ import print_function\ +/' "$file.tmp"; + # remove the typing imports + sed -i '' '/from typing import.*/d' "$file.tmp"; + # no types hints + strip-hints "$file.tmp" > "$file"; + rm "$file.tmp"; +done +# change imports, exceptions, bytes, class(object), etc. Dont change: print("a") -> print "a", str -> unicode. +3to2 ./ -n -w -x print -x str; +sleep 5; + +sed -i '' 's/urllib.request/urllib2/g' lumigo_tracer/utils.py; +sed -i '' 's/from collections.abc import Iterable/from collections import Iterable/g' lumigo_tracer/parsers/utils.py; +sed -i '' 's/except json.JSONDecodeError/except ValueError/g' lumigo_tracer/parsers/utils.py; +sed -i '' 's/900_000/900000/g' lumigo_tracer/utils.py; +sed -i '' 's/100_000/100000/g' lumigo_tracer/utils.py; +sed -i '' 's/FrameVariables = Dict\[str, str\]/ /g' lumigo_tracer/utils.py; +sed -i '' 's/frame_info.filename/frame_info[1]/g' lumigo_tracer/utils.py; +sed -i '' 's/frame_info.filename/frame_info[1]/g' test/unit/test_main_utils.py; +sed -i '' 's/frame_info.lineno/frame_info[2]/g' lumigo_tracer/utils.py; +sed -i '' 's/frame_info.lineno/frame_info[2]/g' test/unit/test_main_utils.py; +sed -i '' 's/frame_info.function/frame_info[3]/g' lumigo_tracer/utils.py; +sed -i '' 's/frame_info.function/frame_info[3]/g' test/unit/test_main_utils.py; +sed -i '' 's/frame_info.frame/frame_info[0]/g' lumigo_tracer/utils.py; +sed -i '' 's/frame_info.frame/frame_info[0]/g' test/unit/test_main_utils.py; +sed -i '' 's/urllib.parse/urllib/g' lumigo_tracer/parsers/utils.py; +sed -i '' 's/**self.lumigo_conf_kwargs,/**self.lumigo_conf_kwargs/g' lumigo_tracer/sync_http/sync_hook.py; +sed -i '' 's/**additional_info,/**additional_info/g' lumigo_tracer/spans_container.py; +sed -i '' 's/.hex()/.encode("hex")/g' lumigo_tracer/spans_container.py; +sed -i '' '/from __future__ import absolute_import/d' lumigo_tracer/libs/xmltodict.py; +sed -i '' 's/1_000_000/1000000/g' test/unit/test_main_utils.py; +sed -i '' 's/import urllib/import urllib2/g' test/unit/sync_http/test_sync_hook.py; +sed -i '' 's/import urllib23/import urllib3/g' test/unit/sync_http/test_sync_hook.py; +sed -i '' 's/RecursionError/RuntimeError/g' test/unit/test_main_utils.py; +# change backend to remove the next line +sed -i '' 's/FALLBACK_RUNTIME = "provided"/FALLBACK_RUNTIME = "pypy27 (python)"/g' lumigo_tracer/spans_container.py; + +popd + + +#brew install pypy +if [[ ! -d ./pypy_env ]]; then + echo "creating new virtualenv - pypy" + virtualenv -p pypy pypy_env +fi +source ./pypy_env/bin/activate + +pip install pytest capturer mock boto3 urllib3 +python setup.py develop + +py.test diff --git a/src/lumigo_tracer/libs/xmltodict.py b/src/lumigo_tracer/libs/xmltodict.py index d2622548..e27ece36 100644 --- a/src/lumigo_tracer/libs/xmltodict.py +++ b/src/lumigo_tracer/libs/xmltodict.py @@ -1,19 +1,3 @@ -#!/usr/bin/env python - - -""" -Copyright (C) 2012 Martin Blech and individual contributors. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -""" - - -"Makes working with XML feel like you are working with JSON" - try: from defusedexpat import pyexpat as expat except ImportError: @@ -40,11 +24,25 @@ except NameError: # pragma no cover _unicode = str + __author__ = "Martin Blech" __version__ = "0.11.0" __license__ = "MIT" +""" +Copyright (C) 2012 Martin Blech and individual contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" + +"Makes working with XML feel like you are working with JSON" + + class ParsingInterrupted(Exception): pass diff --git a/src/lumigo_tracer/parsers/parser.py b/src/lumigo_tracer/parsers/parser.py index 119cc669..8f59a569 100644 --- a/src/lumigo_tracer/parsers/parser.py +++ b/src/lumigo_tracer/parsers/parser.py @@ -81,7 +81,7 @@ class ServerlessAWSParser(Parser): def parse_response(self, url: str, status_code: int, headers, body: bytes) -> dict: additional_info = {} - message_id = headers.get("x-amzn-RequestId") + message_id = headers.get("x-amzn-RequestId") or headers.get("x-amzn-requestid") if message_id and self.should_add_message_id: additional_info["info"] = {"messageId": message_id} span_id = headers.get("x-amzn-requestid") or headers.get("x-amz-requestid") @@ -216,7 +216,7 @@ def create_span(self, message_id: str) -> dict: def get_parser(url: str) -> Type[Parser]: - service = safe_split_get(url, ".", 0) + service = safe_split_get(str(url), ".", 0) if service == "dynamodb": return DynamoParser elif service == "sns": @@ -225,7 +225,7 @@ def get_parser(url: str) -> Type[Parser]: return LambdaParser elif service == "kinesis": return KinesisParser - elif safe_split_get(url, ".", 1) == "s3": + elif safe_split_get(str(url), ".", 1) == "s3": return S3Parser # SQS Legacy Endpoints: https://docs.aws.amazon.com/general/latest/gr/rande.html elif service in ("sqs", "sqs-fips") or "queue.amazonaws.com" in url: diff --git a/src/lumigo_tracer/spans_container.py b/src/lumigo_tracer/spans_container.py index 8183a5c4..b009f1a8 100644 --- a/src/lumigo_tracer/spans_container.py +++ b/src/lumigo_tracer/spans_container.py @@ -30,6 +30,7 @@ MAX_BODY_SIZE = 1024 # The buffer that we take before reaching timeout to send the traces to lumigo (seconds) TIMEOUT_BUFFER = 0.5 +FALLBACK_RUNTIME = "provided" class SpansContainer: @@ -73,6 +74,9 @@ def __init__( "event": event, "envs": envs, } + info: dict = {"logStreamName": log_stream_name, "logGroupName": log_group_name} + if trigger_by: + info.update(trigger_by) self.function_span = recursive_json_join( { "id": request_id, @@ -81,11 +85,7 @@ def __init__( "runtime": runtime, "memoryAllocated": memory_allocated, "readiness": "cold" if SpansContainer.is_cold else "warm", - "info": { - "logStreamName": log_stream_name, - "logGroupName": log_group_name, - **(trigger_by or {}), - }, + "info": info, }, self.base_msg, ) @@ -260,7 +260,7 @@ def create_span(cls, event=None, context=None, force=False) -> "SpansContainer": cls._span = SpansContainer( started=int(time.time() * 1000), name=os.environ.get("AWS_LAMBDA_FUNCTION_NAME"), - runtime=os.environ.get("AWS_EXECUTION_ENV"), + runtime=os.environ.get("AWS_EXECUTION_ENV", FALLBACK_RUNTIME), region=os.environ.get("AWS_REGION"), memory_allocated=os.environ.get("AWS_LAMBDA_FUNCTION_MEMORY_SIZE"), log_stream_name=os.environ.get("AWS_LAMBDA_LOG_STREAM_NAME"), diff --git a/src/lumigo_tracer/sync_http/sync_hook.py b/src/lumigo_tracer/sync_http/sync_hook.py index da60c6b2..b5fc7493 100644 --- a/src/lumigo_tracer/sync_http/sync_hook.py +++ b/src/lumigo_tracer/sync_http/sync_hook.py @@ -5,7 +5,6 @@ import os import builtins from functools import wraps -import importlib.util from lumigo_tracer.libs.wrapt import wrap_function_wrapper from lumigo_tracer.parsers.utils import safe_get_list @@ -15,6 +14,7 @@ get_logger, lumigo_safe_execute, is_aws_environment, + is_python_3, ) from lumigo_tracer.spans_container import SpansContainer, TimeoutMechanism from lumigo_tracer.parsers.http_data_classes import HttpRequest @@ -39,18 +39,24 @@ def _request_wrapper(func, instance, args, kwargs): args[0].seek(current_pos) host, method, headers, body, uri = ( - getattr(instance, "host", None), - getattr(instance, "_method", None), + str(getattr(instance, "host", "")), + str(getattr(instance, "_method", "")), None, None, None, ) with lumigo_safe_execute("parse request"): - if isinstance(data, bytes) and _BODY_HEADER_SPLITTER in data: + if isinstance(data, str) and _BODY_HEADER_SPLITTER in data: headers, body = data.split(_BODY_HEADER_SPLITTER, 1) if _FLAGS_HEADER_SPLITTER in headers: request_info, headers = headers.split(_FLAGS_HEADER_SPLITTER, 1) - headers = http.client.parse_headers(BytesIO(headers)) + if is_python_3(): + headers = http.client.parse_headers(BytesIO(headers)) + else: + import email + + message = email.message_from_file(BytesIO(headers)) + headers = {t[0]: t[1] for t in message.items()} path_and_query_params = ( # Parse path from request info, remove method (GET | POST) and http version (HTTP/1.1) request_info.decode("ascii") @@ -84,8 +90,8 @@ def _response_wrapper(func, instance, args, kwargs): """ ret_val = func(*args, **kwargs) with lumigo_safe_execute("parse response"): - headers = ret_val.headers - status_code = ret_val.code + headers = ret_val.headers if is_python_3() else ret_val.msg.dict + status_code = ret_val.code if is_python_3() else ret_val.status SpansContainer.get_span().update_event_response(instance.host, status_code, headers, b"") return ret_val @@ -98,7 +104,10 @@ def _read_wrapper(func, instance, args, kwargs): if ret_val: with lumigo_safe_execute("parse response.read"): SpansContainer.get_span().update_event_response( - None, instance.code, instance.headers, ret_val + None, + instance.code if is_python_3() else instance.status, + instance.headers if is_python_3() else instance.msg.dict, + ret_val, ) return ret_val @@ -253,12 +262,23 @@ def wrap_http_calls(): if not already_wrapped: with lumigo_safe_execute("wrap http calls"): get_logger().debug("wrapping the http request") - wrap_function_wrapper("http.client", "HTTPConnection.send", _request_wrapper) wrap_function_wrapper("botocore.awsrequest", "AWSRequest.__init__", _putheader_wrapper) - wrap_function_wrapper("http.client", "HTTPConnection.getresponse", _response_wrapper) - wrap_function_wrapper("http.client", "HTTPResponse.read", _read_wrapper) - if importlib.util.find_spec("urllib3"): + if is_python_3(): + wrap_function_wrapper("http.client", "HTTPConnection.send", _request_wrapper) + wrap_function_wrapper( + "http.client", "HTTPConnection.getresponse", _response_wrapper + ) + wrap_function_wrapper("http.client", "HTTPResponse.read", _read_wrapper) + else: + wrap_function_wrapper("httplib", "HTTPConnection.send", _request_wrapper) + wrap_function_wrapper("httplib", "HTTPConnection.getresponse", _response_wrapper) + wrap_function_wrapper("httplib", "HTTPResponse.read", _read_wrapper) + try: + import urllib3 # noqa + wrap_function_wrapper( "urllib3.response", "HTTPResponse.read_chunked", _read_stream_wrapper ) + except ImportError: + pass already_wrapped = True diff --git a/src/lumigo_tracer/utils.py b/src/lumigo_tracer/utils.py index 8e84636d..25f406c9 100644 --- a/src/lumigo_tracer/utils.py +++ b/src/lumigo_tracer/utils.py @@ -1,6 +1,8 @@ import json import logging import os +import sys + import time import urllib.request from urllib.error import URLError @@ -15,7 +17,7 @@ LUMIGO_EVENT_KEY = "_lumigo" STEP_FUNCTION_UID_KEY = "step_function_uid" MAX_SIZE_FOR_REQUEST: int = int(os.environ.get("LUMIGO_MAX_SIZE_FOR_REQUEST", 900_000)) -MAX_VARS_SIZE = 100000 +MAX_VARS_SIZE = 100_000 MAX_VAR_LEN = 200 MAX_ENTRY_SIZE = 1024 FrameVariables = Dict[str, str] @@ -166,6 +168,10 @@ def is_aws_environment(): return bool(os.environ.get("LAMBDA_RUNTIME_DIR")) +def is_python_3(): + return sys.version.startswith("3") + + def format_frames(frames_infos: List[inspect.FrameInfo]) -> List[dict]: free_space = MAX_VARS_SIZE frames: List[dict] = [] @@ -221,6 +227,11 @@ def prepare_large_data(value: Union[str, bytes, dict, None], max_size=MAX_ENTRY_ elif isinstance(value, bytes): try: value = value.decode() + except UnicodeDecodeError: + try: + value = repr(value) + except Exception: + pass except Exception: pass diff --git a/src/test/unit/parsers/test_parser.py b/src/test/unit/parsers/test_parser.py index d68c93ef..b30c1808 100644 --- a/src/test/unit/parsers/test_parser.py +++ b/src/test/unit/parsers/test_parser.py @@ -1,13 +1,26 @@ +import json + from lumigo_tracer.parsers.parser import ServerlessAWSParser, Parser -import http.client +from lumigo_tracer.parsers.http_data_classes import HttpRequest +from lumigo_tracer.utils import Configuration def test_serverless_aws_parser_fallback_doesnt_change(): url = "https://kvpuorrsqb.execute-api.us-west-2.amazonaws.com" - headers = http.client.HTTPMessage() - headers.add_header("nothing", "relevant") + headers = {"nothing": "relevant"} serverless_parser = ServerlessAWSParser().parse_response(url, 200, headers=headers, body=b"") root_parser = Parser().parse_response(url, 200, headers=headers, body=b"") serverless_parser.pop("ended") root_parser.pop("ended") assert serverless_parser == root_parser + + +def test_non_decodeable_body(monkeypatch): + """ + Note: this test may fail only in python2, where '\xff' is an encoded bytes, which can not be cast to any str. + """ + monkeypatch.setattr(Configuration, "verbose", True) + params = HttpRequest(host="a", method="b", uri="c", headers={}, body="\xff") + + return_json = Parser().parse_request(params) + assert json.dumps(return_json) diff --git a/src/test/unit/sync_http/test_sync_hook.py b/src/test/unit/sync_http/test_sync_hook.py index 1befba13..143ee8b2 100644 --- a/src/test/unit/sync_http/test_sync_hook.py +++ b/src/test/unit/sync_http/test_sync_hook.py @@ -6,7 +6,7 @@ import urllib from functools import wraps from io import BytesIO -from types import SimpleNamespace +from collections import namedtuple import logging import urllib3 @@ -14,11 +14,13 @@ from lumigo_tracer import lumigo_tracer, LumigoChalice from lumigo_tracer.parsers.parser import Parser import http.client -from lumigo_tracer.utils import Configuration, STEP_FUNCTION_UID_KEY, LUMIGO_EVENT_KEY +from lumigo_tracer.utils import Configuration, STEP_FUNCTION_UID_KEY, LUMIGO_EVENT_KEY, is_python_3 import pytest from lumigo_tracer.spans_container import SpansContainer +ContextClass = namedtuple("Context", "aws_request_id") + def test_lambda_wrapper_basic_events(reporter_mock): """ @@ -87,6 +89,15 @@ def lambda_test_function(): assert "Content-Length" in http_spans[0]["info"]["httpInfo"]["request"]["headers"] +def test_lambda_wrapper_non_encodeable_request(): + @lumigo_tracer(token="123") + def lambda_test_function(): + http.client.HTTPConnection("www.google.com").request("POST", "/", "\xff") + + lambda_test_function() + assert json.dumps(SpansContainer.get_span().http_spans) + + def test_lambda_wrapper_query_with_http_params(): @lumigo_tracer(token="123") def lambda_test_function(): @@ -195,10 +206,11 @@ def lambda_test_function(event, context): return 1 with CaptureOutput() as capturer: - assert lambda_test_function({}, SimpleNamespace(aws_request_id="1234")) == 1 + assert lambda_test_function({}, ContextClass(aws_request_id="1234")) == 1 assert Configuration.enhanced_print is True - assert "RequestId: 1234 hello" in capturer.get_lines() - assert "RequestId: 1234 world" in capturer.get_lines() + lines = capturer.get_lines() + assert any(l.startswith(u"RequestId: 1234") and "hello" in l for l in lines) + assert any(l.startswith(u"RequestId: 1234") and "world" in l for l in lines) def test_wrapping_without_print_override(): @@ -208,7 +220,7 @@ def lambda_test_function(event, context): return 1 with CaptureOutput() as capturer: - assert lambda_test_function({}, SimpleNamespace(aws_request_id="1234")) == 1 + assert lambda_test_function({}, ContextClass(aws_request_id="1234")) == 1 assert Configuration.enhanced_print is False assert any(line == "hello" for line in capturer.get_lines()) @@ -243,6 +255,7 @@ def lambda_test_function(): assert caplog.records[-1].msg == "An exception occurred in lumigo's code add request event" +@pytest.mark.skipif(not is_python_3(), reason="chalice is for python 3") def test_lumigo_chalice(): class App: @property @@ -268,6 +281,7 @@ def __call__(self, *args, **kwargs): assert SpansContainer._span +@pytest.mark.skipif(not is_python_3(), reason="chalice is for python 3") def test_lumigo_chalice_create_extra_lambdas(monkeypatch): # mimic aws env monkeypatch.setitem(os.environ, "LAMBDA_RUNTIME_DIR", "true") @@ -314,10 +328,13 @@ def lambda_test_function(event, context): logging.warning("hello\nworld") return 1 - assert lambda_test_function({}, SimpleNamespace(aws_request_id="1234")) == 1 + assert lambda_test_function({}, ContextClass(aws_request_id="1234")) == 1 assert Configuration.enhanced_print is True - assert "RequestId: 1234 test_sync_hook.py" in caplog.text - assert "WARNING hello\nRequestId: 1234 world" in caplog.text + lines = caplog.text.splitlines() + assert any( + l.startswith("RequestId: 1234") and "test_sync_hook.py" in l and "hello" in l for l in lines + ) + assert any(l.startswith("RequestId: 1234") and "world" in l for l in lines) def test_wrapping_with_logging_exception(caplog): @@ -333,13 +350,13 @@ def lambda_test_function(event, context): logger.exception("hello") return 1 - assert lambda_test_function({}, SimpleNamespace(aws_request_id="1234")) == 1 + assert lambda_test_function({}, ContextClass(aws_request_id="1234")) == 1 # Check all lines have exactly one RequestId. for line in caplog.text.splitlines(): assert line.startswith("RequestId: 1234") and line.count("RequestId: 1234") == 1 # Check the message was logged. test_message = [line for line in caplog.text.splitlines() if line.endswith("hello")][0] - assert test_message.replace(" ", "").endswith("ERRORhello") + assert "ERROR" in test_message def test_wrapping_with_logging_override_complex_usage(): @@ -356,7 +373,7 @@ def lambda_test_function(event, context): return 1 with CaptureOutput() as capturer: - assert lambda_test_function({}, SimpleNamespace(aws_request_id="1234")) == 1 + assert lambda_test_function({}, ContextClass(aws_request_id="1234")) == 1 assert Configuration.enhanced_print is True assert "RequestId: 1234 my_test [INFO] hello" in capturer.get_lines() assert "RequestId: 1234 world" in capturer.get_lines() @@ -368,9 +385,9 @@ def lambda_test_function(event, context): logging.warning("hello\nworld") return 1 - assert lambda_test_function({}, SimpleNamespace(aws_request_id="1234")) == 1 + assert lambda_test_function({}, ContextClass(aws_request_id="1234")) == 1 assert Configuration.enhanced_print is False - assert " WARNING hello\nworld" in caplog.text + assert "WARNING" in caplog.text and "hello\nworld" in caplog.text def test_wrapping_urlib_stream_get(): diff --git a/src/test/unit/test_main_utils.py b/src/test/unit/test_main_utils.py index 7bc4f795..68a34c06 100644 --- a/src/test/unit/test_main_utils.py +++ b/src/test/unit/test_main_utils.py @@ -1,4 +1,6 @@ import inspect +import sys + import pytest from lumigo_tracer.utils import ( _create_request_body, @@ -146,7 +148,7 @@ def func(): def test_format_frames__pass_max_vars_size(): def func(): for i in range(MAX_VARS_SIZE * 2): - exec(f"a{i} = 'A'") + exec(f"a{i} = 'A'") in globals(), locals() 1 / 0 try: @@ -194,7 +196,7 @@ def func(): ("a" * 21, "a" * 20 + "...[too long]"), ({"a": "a"}, '{"a": "a"}'), # dict. # dict that can't be converted to json. - ({"a": set()}, "{'a': set()}"), # type: ignore + ({"a": sys}, "{'a':