Hello,
While testing the CMPXCHG instruction, I noticed a semantic mismatch in the destination operand behavior.
For CMPXCHG, Intel documents the operation as:
(* Accumulator = AL, AX, EAX, or RAX depending on whether a byte, word, doubleword, or quadword comparison is being performed *)
TEMP := DEST
IF accumulator = TEMP
THEN
ZF := 1;
DEST := SRC;
ELSE
ZF := 0;
accumulator := TEMP;
DEST := TEMP;
FI;
However, with rax=0xffffffffffffffff, rbx=0 and mem=0xffffffffffffffff, the current behavior in unicorn miscalculates rax:
We have accumulator=eax and TEMP=mem so accumulator = TEMP.
This is expected as zf=1.
In this branch accumulator should not be changed, however, unicorn mutates RAX.
cmpxchg dword ptr [r8], ebx
initial rax=0xffffffffffffffff rbx=0x0000000000000000 mem=0xffffffffffffffff
actual: rax=0x00000000ffffffff mem=0xffffffff00000000 zf=1
expected: rax=0xffffffffffffffff mem=0xffffffff00000000 zf=1
You should be able to reproduce this with the following program and script.
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
int main(void) {
uint64_t mem = UINT64_MAX;
uint64_t rax;
uint64_t flags;
asm volatile(
"mov %[ptr], %%r8\n\t"
"mov $-1, %%rax\n\t"
"xor %%ebx, %%ebx\n\t"
"cmpxchgl %%ebx, (%%r8)\n\t"
"pushfq\n\t"
"pop %[flags]\n\t"
"mov %%rax, %[rax]\n\t"
: [rax] "=r"(rax), [flags] "=r"(flags), "+m"(mem)
: [ptr] "r"(&mem)
: "rax", "rbx", "r8", "cc", "memory");
printf("cmpxchg dword ptr [r8], ebx\n");
printf("initial rax=0xffffffffffffffff rbx=0x0000000000000000 mem=0xffffffffffffffff\n");
printf("actual: rax=0x%016" PRIx64 " mem=0x%016" PRIx64 " zf=%" PRIu64
"\n",
rax, mem, (flags >> 6) & 1);
printf("expected: rax=0xffffffffffffffff mem=0xffffffff00000000 zf=1\n");
return 0;
}
from unicorn import *
from unicorn.x86_const import *
CODE = b"\x41\x0f\xb1\x18" # cmpxchg dword ptr [r8], ebx
CODE_ADDRESS = 0x1000000
DATA_ADDRESS = 0x2000000
print("cmpxchg dword ptr [r8], ebx")
try:
mu = Uc(UC_ARCH_X86, UC_MODE_64)
mu.mem_map(CODE_ADDRESS, 2 * 1024 * 1024)
mu.mem_map(DATA_ADDRESS, 2 * 1024 * 1024)
mu.mem_write(CODE_ADDRESS, CODE)
mu.mem_write(DATA_ADDRESS, (0xFFFFFFFFFFFFFFFF).to_bytes(8, "little"))
mu.reg_write(UC_X86_REG_R8, DATA_ADDRESS)
mu.reg_write(UC_X86_REG_RAX, 0xFFFFFFFFFFFFFFFF)
mu.reg_write(UC_X86_REG_RBX, 0)
mu.emu_start(CODE_ADDRESS, CODE_ADDRESS + len(CODE))
rax = mu.reg_read(UC_X86_REG_RAX)
flags = mu.reg_read(UC_X86_REG_RFLAGS)
mem = int.from_bytes(mu.mem_read(DATA_ADDRESS, 8), "little")
print("initial rax=0xffffffffffffffff rbx=0x0000000000000000 mem=0xffffffffffffffff")
print("actual: rax=0x%016x mem=0x%016x zf=%d" % (rax, mem, (flags >> 6) & 1))
print("expected: rax=0xffffffffffffffff mem=0xffffffff00000000 zf=1")
except UcError as e:
print("ERROR: %s" % e)
This was tested with unicorn 2.1.4 (pip) on Fedora Linux.
Hello,
While testing the CMPXCHG instruction, I noticed a semantic mismatch in the destination operand behavior.
For
CMPXCHG, Intel documents the operation as:However, with
rax=0xffffffffffffffff,rbx=0andmem=0xffffffffffffffff, the current behavior in unicorn miscalculatesrax:We have
accumulator=eaxandTEMP=memsoaccumulator = TEMP.This is expected as
zf=1.In this branch accumulator should not be changed, however, unicorn mutates
RAX.You should be able to reproduce this with the following program and script.
This was tested with unicorn
2.1.4(pip) on Fedora Linux.