Skip to content

Commit 5807efe

Browse files
committed
Merge branch 'master' of github.com:vmprof/vmprof-python
2 parents ef3c38a + 5f62ab1 commit 5807efe

File tree

8 files changed

+108
-56
lines changed

8 files changed

+108
-56
lines changed

setup.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
extra_compile_args = ['-Wno-unused']
2626
extra_compile_args += ['-DVMPROF_APPLE=1']
2727
extra_compile_args += ['-DVMPROF_UNIX=1']
28+
# overwrite the optimization level, if it is not optimized enough,
29+
# it might use the regiter rbx...
30+
extra_compile_args += ['-g']
31+
extra_compile_args += ['-O2']
2832
elif sys.platform.startswith('linux'):
2933
libraries = ['dl','unwind']
3034
extra_compile_args = ['-Wno-unused']
@@ -74,7 +78,7 @@
7478
name='vmprof',
7579
author='vmprof team',
7680
author_email='[email protected]',
77-
version="0.4.0.dev9",
81+
version="0.4.0.dev10",
7882
packages=find_packages(),
7983
description="Python's vmprof client",
8084
long_description='See https://vmprof.readthedocs.org/',

src/_vmprof.c

+15-3
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,29 @@ static destructor Original_code_dealloc = 0;
2727
PyObject* (*_default_eval_loop)(PyFrameObject *, int) = 0;
2828

2929
#ifdef VMPROF_UNIX
30-
#ifdef __GNUC__
31-
__attribute__((optimize("O1")))
32-
#elif defined(__clang__)
30+
#ifdef __clang__
3331
__attribute__((disable_tail_calls))
32+
#elif defined(__GNUC__)
33+
__attribute__((optimize("O1")))
3434
#endif
3535
PyObject* vmprof_eval(PyFrameObject *f, int throwflag)
3636
{
37+
#ifdef X86_64
3738
register PyFrameObject * callee_saved asm("rbx");
39+
#elif defined(X86_32)
40+
register PyFrameObject * callee_saved asm("edi");
41+
#else
42+
# error "platform not supported"
43+
#endif
3844

3945
asm volatile(
46+
#ifdef X86_64
4047
"movq %1, %0\t\n"
48+
#elif defined(X86_32)
49+
"mov %1, %0\t\n"
50+
#else
51+
# error "platform not supported"
52+
#endif
4153
: "=r" (callee_saved)
4254
: "r" (f) );
4355
return _default_eval_loop(f, throwflag);

src/_vmprof.h

+6
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ PyObject* vmprof_eval(PyFrameObject *f, int throwflag);
4949
#define VMP_SUPPORTS_NATIVE_PROFILING
5050
#endif
5151

52+
#ifdef __x86_64__
53+
#define X86_64
54+
#elif defined(__i386__)
55+
#define X86_32
56+
#endif
57+
5258
#define MARKER_STACKTRACE '\x01'
5359
#define MARKER_VIRTUAL_IP '\x02'
5460
#define MARKER_TRAILER '\x03'

src/stack.c

+6-4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
#ifdef VMP_SUPPORTS_NATIVE_PROFILING
1616
#define UNW_LOCAL_ONLY
1717
#include <libunwind.h>
18+
# ifdef X86_64
19+
# define REG_RBX UNW_X86_64_RBX
20+
# elif defined(X86_32)
21+
# define REG_RBX UNW_X86_EDI
22+
# endif
1823
#endif
1924

2025
#ifdef __APPLE__
@@ -161,12 +166,9 @@ int vmp_walk_and_record_stack(PY_STACK_FRAME_T *frame, void ** result,
161166
if ((void*)pip.start_ip == (void*)vmprof_eval) {
162167
// yes we found one stack entry of the python frames!
163168
unw_word_t rbx = 0;
164-
if (unw_get_reg(&cursor, UNW_X86_64_RBX, &rbx) < 0) {
169+
if (unw_get_reg(&cursor, REG_RBX, &rbx) < 0) {
165170
break;
166171
}
167-
if (top_most_frame == NULL) {
168-
top_most_frame = (PyFrameObject*)rbx;
169-
}
170172
if (rbx != (unw_word_t)top_most_frame) {
171173
// uh we are screwed! the ip indicates we are have context
172174
// to a PyEval_EvalFrameEx function, but when we tried to retrieve

src/trampoline.c

+71-46
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#include "trampoline.h"
22

33
#include "machine.h"
4+
#include "_vmprof.h"
45

5-
#define _GNU_SOURCE
6+
#define _GNU_SOURCE 1
67
#include <string.h>
78
#include <stdio.h>
89
#include <stdlib.h>
@@ -59,17 +60,11 @@ static char * g_trampoline = NULL;
5960
// the machine code size copied over from the callee
6061
static int g_trampoline_length;
6162

62-
void _jmp_to(char * a, uintptr_t addr, int call) {
63-
64-
// TODO 32-bit
65-
63+
int _jmp_to(char * a, uintptr_t addr) {
64+
#ifdef X86_64
6665
// moveabsq <addr>, <reg>
6766
a[0] = 0x48; // REX.W
68-
if (call) {
69-
a[1] = 0xb8; // %rax
70-
} else {
71-
a[1] = 0xba; // %rdx
72-
}
67+
a[1] = 0xba; // %rdx
7368
a[2] = addr & 0xff;
7469
a[3] = (addr >> 8) & 0xff;
7570
a[4] = (addr >> 16) & 0xff;
@@ -79,21 +74,67 @@ void _jmp_to(char * a, uintptr_t addr, int call) {
7974
a[8] = (addr >> 48) & 0xff;
8075
a[9] = (addr >> 56) & 0xff;
8176

82-
if (call) {
83-
a[10] = 0xff;
84-
a[11] = 0xd0;
85-
} else {
86-
a[10] = 0xff;
87-
a[11] = 0xe2;
77+
// jmp %edx
78+
a[10] = 0xff;
79+
a[11] = 0xe2;
80+
return 12;
81+
#elif defined(X86_32)
82+
// mov <addr>, %edx
83+
a[0] = 0xba;
84+
a[1] = addr & 0xff;
85+
a[2] = (addr >> 8) & 0xff;
86+
a[3] = (addr >> 16) & 0xff;
87+
a[4] = (addr >> 24) & 0xff;
88+
// jmp %edx
89+
a[5] = 0xff;
90+
a[6] = 0xe2;
91+
return 7;
92+
#endif
93+
return 0;
94+
}
95+
96+
#ifdef X86_32
97+
int patch_relative_call(void * base, char * rel_call, char *rel_call_end, int bytes_after) {
98+
if (bytes_after != 0) {
99+
return 0;
88100
}
101+
102+
char * r = rel_call+1;
103+
104+
int off = r[0] |
105+
((r[1] & 0xff) << 8) |
106+
((r[2] & 0xff) << 16) |
107+
((r[3] & 0xff) << 24);
108+
// instruction pointer is just after the whole instruction
109+
intptr_t addr = (intptr_t)base + 5 + off;
110+
111+
rel_call[0] = 0xb8;
112+
rel_call[1] = addr & 0xff;
113+
rel_call[2] = (addr >> 8) & 0xff;
114+
rel_call[3] = (addr >> 16) & 0xff;
115+
rel_call[4] = (addr >> 24) & 0xff;
116+
// jmp %edx
117+
rel_call[5] = 0xff;
118+
rel_call[6] = 0xd0;
119+
120+
return 2;
89121
}
122+
#endif
90123

91124
// a hilarious typo, tramp -> trump :)
92125
int _redirect_trampoline_and_back(char * eval, char * trump, char * vmprof_eval) {
93126

94127
char * trump_first_byte = trump;
128+
#ifdef X86_64
95129
int needed_bytes = 12;
130+
#elif defined(X86_32)
131+
int needed_bytes = 8;
132+
int relative_call_at_pos = -1;
133+
#else
134+
# error "platform not supported"
135+
#endif
96136
int bytes = 0;
137+
int off = 0;
97138
char * ptr = eval;
98139

99140
// 1) copy the instructions that should be redone in the trampoline
@@ -102,6 +143,12 @@ int _redirect_trampoline_and_back(char * eval, char * trump, char * vmprof_eval)
102143
if (res == 0) {
103144
return 1;
104145
}
146+
#ifdef X86_32
147+
if (ptr[0] == '\xe8') {
148+
// occur on 32bit linux
149+
relative_call_at_pos = bytes;
150+
}
151+
#endif
105152
bytes += res;
106153
ptr += res;
107154
}
@@ -110,40 +157,18 @@ int _redirect_trampoline_and_back(char * eval, char * trump, char * vmprof_eval)
110157
// 2) initiate the first few instructions of the eval loop
111158
{
112159
(void)memcpy(trump, eval, bytes);
113-
_jmp_to(trump+bytes, (uintptr_t)eval+bytes, 0);
114-
//char * wptr = trump;
115-
//*wptr++ = 0x55;
116-
117-
//*wptr++ = 0x48;
118-
//*wptr++ = 0x89;
119-
//*wptr++ = 0xe5;
120-
121-
//*wptr++ = 0x53;
122-
//*wptr++ = 0x53;
123-
124-
//*wptr++ = 0x48;
125-
//*wptr++ = 0x89;
126-
//*wptr++ = 0xfb;
127-
128-
//char * trampcall = wptr;
129-
//wptr += 12;
130-
131-
//// pop
132-
//*wptr++ = 0x5b;
133-
//*wptr++ = 0x5b;
134-
//*wptr++ = 0x5d;
135-
//*wptr++ = 0xc3;
136-
137-
//_jmp_to(trampcall, (uintptr_t)wptr, 1);
138-
139-
//(void)memcpy(wptr, eval, bytes);
140-
//wptr += bytes;
141-
//_jmp_to(wptr, (uintptr_t)eval+bytes, 0);
160+
#ifdef X86_32
161+
if (relative_call_at_pos != -1) {
162+
off = patch_relative_call(eval+relative_call_at_pos, trump+relative_call_at_pos,
163+
trump+relative_call_at_pos+5, bytes-relative_call_at_pos-5);
164+
}
165+
#endif
166+
_jmp_to(trump+bytes+off, (uintptr_t)eval+bytes);
142167
}
143168

144169
// 3) overwrite the first few bytes of callee to jump to tramp
145170
// callee must call back
146-
_jmp_to(eval, (uintptr_t)vmprof_eval, 0);
171+
_jmp_to(eval, (uintptr_t)vmprof_eval);
147172

148173
return 0;
149174
}

travis/script.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fi
99
py.test vmprof/ -vrs
1010
py.test jitlog/ -vrs
1111

12-
if [[ -n "TRAVIS_TAG" && "$BUILD_LINUX_WHEEL" == "1" ]]; then
12+
if [[ -n "$TRAVIS_TAG" && "$BUILD_LINUX_WHEEL" == "1" ]]; then
1313
docker pull quay.io/pypa/manylinux1_x86_64
1414
docker pull quay.io/pypa/manylinux1_i686
1515
docker run --rm -v `pwd`:/io:Z quay.io/pypa/manylinux1_x86_64 bash /io/travis/build-wheels.sh;

vmprof/test/test_c_symboltable.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class TestSymbolTable(object):
1515
def setup_class(cls):
1616
stack_ffi = FFI()
1717
stack_ffi.cdef("""
18-
void dump_all_known_symbols(int fd);
18+
//void dump_all_known_symbols(int fd);
1919
int test_extract(char ** name, int * lineno, char ** src);
2020
""")
2121
with open("src/symboltable.c", "rb") as fd:

vmprof/test/test_run.py

+3
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,9 @@ def walk(parent):
367367
if parent is None or len(parent.children) == 0:
368368
return False
369369

370+
if 'n:native_gzipgzipgzip:' in parent.name:
371+
return True
372+
370373
for child in parent.children.values():
371374
if 'n:native_gzipgzipgzip:' in child.name:
372375
p = float(child.count) / parent.count

0 commit comments

Comments
 (0)