问题
for an reverse engineering project i want to split an IDA Pro produced asm file into multiple segment files which must result in binary identical executeables
binary equal means to me 100% equal: segment-starts, ordering, sizes, opcodes etc. - because its from a reversed exe and only some of the symbols are detected by IDA, mostly just variable or function offsets, etc. so i can't rely on the assembler own ordering of segments,symbols etc. - it needs to be 100% exact or else i introduce unfindable errors into the reversed code, it is ok if the assembler acts stupid, as long as the result is equal
im using masm and the old microsoft linker:
ml.exe: Microsoft (R) Macro Assembler Version 14.16.27032.1, from VS2017 community endition
link.exe: Microsoft (R) Segmented Executable Linker Version 5.60.339 Dec 5 1994, latest 16bit version
masm generates different code for multi.exe if i leave out the .model directive
single.asm
.model medium
.386
seg000 segment para public use16
text db 'Hello World!',0ah,0dh,'$'
seg000 ends
seg001 segment para public use16
start proc
mov ax,seg seg000
mov ds,ax
push ax
pop ax
call print
mov ax,4c00h
int 21h
start endp
seg001 ends
seg002 segment para public use16
print proc far
mov dx,offset text
mov ah,09h
int 21h
retf
print endp
seg002 ends
seg003 segment para use16 stack
db 256 dup (?)
seg003 ends
end start
multi asm file version (base on help from user rkhb)
segments.inc
seg000 segment para public use16
seg000 ends
seg001 segment para public use16
seg001 ends
seg002 segment para public use16
seg002 ends
seg003 segment para use16 stack
seg003 ends
seg000.asm
.model medium
.386
include segments.inc
public text
seg000 segment
text db 'Hello World!',0ah,0dh,'$'
seg000 ends
end
seg001.asm
.model medium
.386
include segments.inc
extern print:FAR
seg001 segment
start proc
mov ax, seg000
mov ds,ax
push ax
pop ax
call print
mov ax,4c00h
int 21h
start endp
seg001 ends
end start
seg002.asm
.model medium
.386
include segments.inc
extern text:BYTE
seg002 segment
print proc far
mov dx,offset text
mov ah,09h
int 21h
retf
print endp
seg002 ends
end
seg003.asm
.model medium
.386
include segments.inc
seg003 segment
db 256 dup (?)
seg003 ends
end
build_single.cmd
ml.exe /c /omf single.asm
link.exe /MAP single.obj,,,,,
build_multi.cmd
ml.exe /c /omf seg000.asm
ml.exe /c /omf seg001.asm
ml.exe /c /omf seg002.asm
ml.exe /c /omf seg003.asm
link.exe /MAP /L seg000.obj seg001.obj seg002.obj seg003.obj,multi.exe,multi.map,,,
calling build_single and build_multi resulting in nearly equal executables but the multi.exe is 10 bytes larger (filled with 0 at the end)
hex-diff: left is single.exe
comparing the IDA Pro results shows that only the segment alignment before the stack segment is different (in single.exe is the alignment part of the uninitilized area, in multi.exe is it a 0 filled part of the image)
single.ida.asm
.286
.model medium
; ===========================================================================
; Segment type: Pure data
dseg segment para public 'DATA'
assume cs:dseg
aHelloWorld db 'Hello World!',0Ah
db 0Dh,'$',0
dseg ends
; ===========================================================================
; Segment type: Pure code
seg001 segment byte public 'CODE'
assume cs:seg001
assume es:nothing, ss:seg003, ds:nothing
; =============== S U B R O U T I N E =======================================
; Attributes: noreturn
public start
start proc near
mov ax, seg dseg
mov ds, ax
assume ds:dseg
push ax
pop ax
call sub_10030
mov ax, 4C00h
int 21h ; DOS - 2+ - QUIT WITH EXIT CODE (EXIT)
start endp ; AL = exit code
; ---------------------------------------------------------------------------
align 10h
seg001 ends
; ===========================================================================
; Segment type: Pure code
seg002 segment byte public 'CODE'
assume cs:seg002
assume es:nothing, ss:nothing, ds:dseg
; =============== S U B R O U T I N E =======================================
sub_10030 proc far ; CODE XREF: start+7P
mov dx, 0
mov ah, 9
int 21h ; DOS - PRINT STRING
; DS:DX -> string terminated by "$"
retf
sub_10030 endp
; ---------------------------------------------------------------------------
db 8 dup(?) ; !!!DIFFERENCE!!!
seg002 ends
; ===========================================================================
; Segment type: Uninitialized
seg003 segment byte stack 'STACK'
assume cs:seg003
assume es:nothing, ss:nothing, ds:dseg
db 100h dup(?)
seg003 ends
end start
multi.ida.asm
.286
.model medium
; ===========================================================================
; Segment type: Pure data
dseg segment para public 'DATA'
assume cs:dseg
aHelloWorld db 'Hello World!',0Ah
db 0Dh,'$',0
dseg ends
; ===========================================================================
; Segment type: Pure code
seg001 segment byte public 'CODE'
assume cs:seg001
assume es:nothing, ss:seg003, ds:nothing
; =============== S U B R O U T I N E =======================================
; Attributes: noreturn
public start
start proc near
mov ax, seg dseg
mov ds, ax
assume ds:dseg
push ax
pop ax
call sub_10030
mov ax, 4C00h
int 21h ; DOS - 2+ - QUIT WITH EXIT CODE (EXIT)
start endp ; AL = exit code
; ---------------------------------------------------------------------------
align 10h
seg001 ends
; ===========================================================================
; Segment type: Pure code
seg002 segment byte public 'CODE'
assume cs:seg002
assume es:nothing, ss:nothing, ds:dseg
; =============== S U B R O U T I N E =======================================
sub_10030 proc far ; CODE XREF: start+7P
mov dx, 0
mov ah, 9
int 21h ; DOS - PRINT STRING
; DS:DX -> string terminated by "$"
retf
sub_10030 endp
; ---------------------------------------------------------------------------
align 10h ; !!!DIFFERENCE!!!
seg002 ends
; ===========================================================================
; Segment type: Uninitialized
seg003 segment byte stack 'STACK'
assume cs:seg003
assume es:nothing, ss:nothing, ds:dseg
db 100h dup(?)
seg003 ends
end start
the difference is:
db 8 dup(?)
and
align 10h
any ideas how to get multi.exe binary equal to single.exe?
the result seems to be linker related
link.exe: the described 8 byte difference
wlink.exe: the described 8 byte difference (added 'STACK' to stack segment or else ss:sp=0:0 in exe header)
optlink.exe: 100% equal
ulink.exe: bug with SP not correctly set (already reported)
IDA shows that there is a "align 10h" at the end of the seg002 in single.obj and (multi)seg002.obj, but then different in the exe image is materialized by the linker, or?
images get identical if i force the stack to become part of the exe images with "db 256 dup (0)" in the seg003 stack segment, then it does not make sense for the linker to ignore the align in multi.exe - but that is not 100% identical without changing also the orginal(single.asm)
来源:https://stackoverflow.com/questions/57911151/how-can-i-get-the-dos-exe-build-from-segments-in-multiple-asm-files-binary-ident