how can i get the dos exe build from segments in multiple asm files binary identical to the single asm file version

喜夏-厌秋 提交于 2019-12-11 07:36:15

问题


for an reverse engineering project i want to split an IDA Pro produced asm file into multiple segment files which must result in binary identical executeables

binary equal means to me 100% equal: segment-starts, ordering, sizes, opcodes etc. - because its from a reversed exe and only some of the symbols are detected by IDA, mostly just variable or function offsets, etc. so i can't rely on the assembler own ordering of segments,symbols etc. - it needs to be 100% exact or else i introduce unfindable errors into the reversed code, it is ok if the assembler acts stupid, as long as the result is equal

im using masm and the old microsoft linker:

ml.exe: Microsoft (R) Macro Assembler Version 14.16.27032.1, from VS2017 community endition
link.exe: Microsoft (R) Segmented Executable Linker  Version 5.60.339 Dec  5 1994, latest 16bit version

masm generates different code for multi.exe if i leave out the .model directive

single.asm

.model medium
.386

seg000 segment para public use16 
text db 'Hello World!',0ah,0dh,'$'
seg000 ends

seg001 segment para public use16 

start proc
  mov ax,seg seg000
  mov ds,ax
  push ax
  pop ax
  call print
  mov ax,4c00h
  int 21h
start endp

seg001 ends

seg002 segment para public use16

print proc far
  mov dx,offset text
  mov ah,09h
  int 21h
  retf
print endp

seg002 ends

seg003 segment para use16 stack
  db 256 dup (?)
seg003 ends

end start

multi asm file version (base on help from user rkhb)

segments.inc

seg000 segment para public use16 
seg000 ends

seg001 segment para public use16 
seg001 ends

seg002 segment para public use16
seg002 ends

seg003 segment para use16 stack
seg003 ends

seg000.asm

.model medium
.386
include segments.inc

public text

seg000 segment
  text db 'Hello World!',0ah,0dh,'$'
seg000 ends

end

seg001.asm

.model medium
.386
include segments.inc

extern print:FAR

seg001 segment

start proc
  mov ax, seg000
  mov ds,ax
  push ax
  pop ax
  call print
  mov ax,4c00h
  int 21h
start endp

seg001 ends

end start

seg002.asm

.model medium
.386
include segments.inc

extern text:BYTE

seg002 segment

print proc far
   mov dx,offset text
   mov ah,09h
   int 21h
   retf
print endp

seg002 ends

end

seg003.asm

.model medium
.386
include segments.inc

seg003 segment
  db 256 dup (?)
seg003 ends

end

build_single.cmd

ml.exe /c /omf single.asm
link.exe /MAP single.obj,,,,,

build_multi.cmd

ml.exe /c /omf seg000.asm
ml.exe /c /omf seg001.asm
ml.exe /c /omf seg002.asm
ml.exe /c /omf seg003.asm
link.exe /MAP /L seg000.obj seg001.obj seg002.obj seg003.obj,multi.exe,multi.map,,,

calling build_single and build_multi resulting in nearly equal executables but the multi.exe is 10 bytes larger (filled with 0 at the end)

hex-diff: left is single.exe

comparing the IDA Pro results shows that only the segment alignment before the stack segment is different (in single.exe is the alignment part of the uninitilized area, in multi.exe is it a 0 filled part of the image)

single.ida.asm

    .286
    .model medium

; ===========================================================================

; Segment type: Pure data
dseg    segment para public 'DATA'
    assume cs:dseg
aHelloWorld db 'Hello World!',0Ah
    db 0Dh,'$',0
dseg    ends

; ===========================================================================

; Segment type: Pure code
seg001    segment byte public 'CODE'
    assume cs:seg001
    assume es:nothing, ss:seg003, ds:nothing

; =============== S U B R O U T I N E =======================================

; Attributes: noreturn

    public start
start   proc near
    mov ax, seg dseg
    mov ds, ax
    assume ds:dseg
    push  ax
    pop ax
    call  sub_10030
    mov ax, 4C00h
    int 21h   ; DOS - 2+ - QUIT WITH EXIT CODE (EXIT)
start   endp      ; AL = exit code

; ---------------------------------------------------------------------------
    align 10h
seg001    ends

; ===========================================================================

; Segment type: Pure code
seg002    segment byte public 'CODE'
    assume cs:seg002
    assume es:nothing, ss:nothing, ds:dseg

; =============== S U B R O U T I N E =======================================


sub_10030 proc far    ; CODE XREF: start+7P
    mov dx, 0
    mov ah, 9
    int 21h   ; DOS - PRINT STRING
          ; DS:DX -> string terminated by "$"
    retf
sub_10030 endp

; ---------------------------------------------------------------------------
    db 8 dup(?) ; !!!DIFFERENCE!!!
seg002    ends

; ===========================================================================

; Segment type: Uninitialized
seg003    segment byte stack 'STACK'
    assume cs:seg003
    assume es:nothing, ss:nothing, ds:dseg
    db 100h dup(?)
seg003    ends


    end start

multi.ida.asm

    .286
    .model medium

; ===========================================================================

; Segment type: Pure data
dseg    segment para public 'DATA'
    assume cs:dseg
aHelloWorld db 'Hello World!',0Ah
    db 0Dh,'$',0
dseg    ends

; ===========================================================================

; Segment type: Pure code
seg001    segment byte public 'CODE'
    assume cs:seg001
    assume es:nothing, ss:seg003, ds:nothing

; =============== S U B R O U T I N E =======================================

; Attributes: noreturn

    public start
start   proc near
    mov ax, seg dseg
    mov ds, ax
    assume ds:dseg
    push  ax
    pop ax
    call  sub_10030
    mov ax, 4C00h
    int 21h   ; DOS - 2+ - QUIT WITH EXIT CODE (EXIT)
start   endp      ; AL = exit code

; ---------------------------------------------------------------------------
    align 10h
seg001    ends

; ===========================================================================

; Segment type: Pure code
seg002    segment byte public 'CODE'
    assume cs:seg002
    assume es:nothing, ss:nothing, ds:dseg

; =============== S U B R O U T I N E =======================================


sub_10030 proc far    ; CODE XREF: start+7P
    mov dx, 0
    mov ah, 9
    int 21h   ; DOS - PRINT STRING
          ; DS:DX -> string terminated by "$"
    retf
sub_10030 endp

; ---------------------------------------------------------------------------
    align 10h ; !!!DIFFERENCE!!!
seg002    ends

; ===========================================================================

; Segment type: Uninitialized
seg003    segment byte stack 'STACK'
    assume cs:seg003
    assume es:nothing, ss:nothing, ds:dseg
    db 100h dup(?)
seg003    ends


    end start

the difference is:

db 8 dup(?)

and

align 10h

any ideas how to get multi.exe binary equal to single.exe?

the result seems to be linker related

link.exe: the described 8 byte difference
wlink.exe: the described 8 byte difference (added 'STACK' to stack segment or else ss:sp=0:0 in exe header)
optlink.exe: 100% equal
ulink.exe: bug with SP not correctly set (already reported)

IDA shows that there is a "align 10h" at the end of the seg002 in single.obj and (multi)seg002.obj, but then different in the exe image is materialized by the linker, or?

images get identical if i force the stack to become part of the exe images with "db 256 dup (0)" in the seg003 stack segment, then it does not make sense for the linker to ignore the align in multi.exe - but that is not 100% identical without changing also the orginal(single.asm)

来源:https://stackoverflow.com/questions/57911151/how-can-i-get-the-dos-exe-build-from-segments-in-multiple-asm-files-binary-ident

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!