Skip to content

Commit 7311c6a

Browse files
authored
X86 segment reg override (#512)
* arch: Add segment_override attribute to Instruction. * arch: Update x86 decoder wrt to default segment register.
1 parent d9255fa commit 7311c6a

File tree

3 files changed

+42
-16
lines changed

3 files changed

+42
-16
lines changed

include/remill/Arch/Instruction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,12 @@ class Instruction {
215215
// Is this instruction decoded within the context of a delay slot?
216216
bool in_delay_slot;
217217

218+
// For x86 it is possible to specify a prefix that overrides the default
219+
// segment register. This attribute by itself is currently not used directly
220+
// by the lifter - it is expeted `Operand`s will include segment reg where appropriate
221+
// but it can be used in different applications.
222+
const Register *segment_override = nullptr;
223+
218224
enum Category {
219225
kCategoryInvalid,
220226
kCategoryNormal,

lib/Arch/Instruction.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,10 @@ std::string Instruction::Serialize(void) const {
692692
ss << " " << function;
693693
}
694694

695+
if (segment_override) {
696+
ss << "(SEGMENT_OVERRIDE " << segment_override->name << ")";
697+
}
698+
695699
for (const auto &op : operands) {
696700
ss << " " << op.Serialize();
697701
}

lib/Arch/X86/Arch.cpp

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,6 @@ static void DecodeMemory(Instruction &inst, const xed_decoded_inst_t *xedd,
424424
auto iform = xed_decoded_inst_get_iform_enum(xedd);
425425
auto iclass = xed_decoded_inst_get_iclass(xedd);
426426
auto op_name = xed_operand_name(xedo);
427-
auto segment = xed_decoded_inst_get_seg_reg(xedd, mem_index);
428427
auto base = xed_decoded_inst_get_base_reg(xedd, mem_index);
429428
auto index = xed_decoded_inst_get_index_reg(xedd, mem_index);
430429
auto disp = xed_decoded_inst_get_memory_displacement(xedd, mem_index);
@@ -438,24 +437,34 @@ static void DecodeMemory(Instruction &inst, const xed_decoded_inst_t *xedd,
438437
size = 16;
439438
}
440439

441-
// Deduce the implicit segment register if it is absent.
442-
if (XED_REG_INVALID == segment) {
443-
segment = XED_REG_DS;
440+
auto raw_segment_reg = xed_decoded_inst_get_seg_reg(xedd, mem_index);
441+
auto deduce_segment = [&](auto segment_reg) {
442+
// Deduce the implicit segment register if it is absent.
443+
if (XED_REG_INVALID != segment_reg) {
444+
return segment_reg;
445+
}
444446
if (XED_REG_RSP == base_wide || XED_REG_RBP == base_wide) {
445-
segment = XED_REG_SS;
447+
return XED_REG_SS;
448+
}
449+
return XED_REG_DS;
450+
};
451+
auto ignore_segment = [&](auto segment_reg) {
452+
// On AMD64, only the `FS` and `GS` segments are non-zero.
453+
if (Is64Bit(inst.arch_name) && XED_REG_FS != segment_reg &&
454+
XED_REG_GS != segment_reg) {
455+
return XED_REG_INVALID;
446456
}
447-
}
448457

449-
// On AMD64, only the `FS` and `GS` segments are non-zero.
450-
if (Is64Bit(inst.arch_name) && XED_REG_FS != segment &&
451-
XED_REG_GS != segment) {
452-
segment = XED_REG_INVALID;
458+
// AGEN operands, e.g. for the `LEA` instuction, can be marked with an
459+
// explicit segment, but it is ignored.
460+
if (XED_OPERAND_AGEN == op_name) {
461+
return XED_REG_INVALID;
462+
}
453463

454-
// AGEN operands, e.g. for the `LEA` instuction, can be marked with an
455-
// explicit segment, but it is ignored.
456-
} else if (XED_OPERAND_AGEN == op_name) {
457-
segment = XED_REG_INVALID;
458-
}
464+
// No need to ignore it
465+
return segment_reg;
466+
};
467+
auto segment_reg = ignore_segment(deduce_segment(raw_segment_reg));
459468

460469
// Special case: `POP [xSP + ...] uses the value of `xSP` after incrementing
461470
// it by the stack width.
@@ -470,7 +479,7 @@ static void DecodeMemory(Instruction &inst, const xed_decoded_inst_t *xedd,
470479
op.addr.address_size =
471480
xed_decoded_inst_get_memop_address_width(xedd, mem_index);
472481

473-
op.addr.segment_base_reg = SegBaseRegOp(segment, op.addr.address_size);
482+
op.addr.segment_base_reg = SegBaseRegOp(segment_reg, op.addr.address_size);
474483
op.addr.base_reg = RegOp(base);
475484
op.addr.index_reg = RegOp(index);
476485
op.addr.scale = XED_REG_INVALID != index ? static_cast<int64_t>(scale) : 0;
@@ -987,6 +996,13 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes,
987996
// instuction implementation.
988997
auto xedi = xed_decoded_inst_inst(xedd);
989998
auto num_operands = xed_decoded_inst_noperands(xedd);
999+
1000+
auto xedv = xed_decoded_inst_operands_const(xedd);
1001+
if (xed_operand_values_has_segment_prefix(xedv)) {
1002+
auto reg_name = xed_reg_enum_t2str(xed_operand_values_segment_prefix(xedv));
1003+
inst.segment_override = RegisterByName(reg_name);
1004+
}
1005+
9901006
for (auto i = 0U; i < num_operands; ++i) {
9911007
auto xedo = xed_inst_operand(xedi, i);
9921008
if (XED_OPVIS_SUPPRESSED != xed_operand_operand_visibility(xedo)) {

0 commit comments

Comments
 (0)