Weird Errors While Reading Disk

此生再无相见时 提交于 2019-12-24 07:31:16

问题


So, I've been working on a hobby project. Creating my own Operating System. I started a while back but dropped it until maybe a couple nights ago. I just fixed an oversight that caused nothing to be read from the sectors I want to read from. With that error out of the way, a new one has came about and I honestly don't even know where to begin debugging this one.

I am coding a Master Boot Record and debugging it with GDB and QEMU, here is the code to my master boot record (It was assembled using YASM)

Sorry if my code is not very good. I am not an expert at assembly language...

; yasm boot.asm -fbin

bits 16

%define part(n,l) section n vstart=l align=1
%define rpart(n,l) section n start=l align=1

; ----------------------- ;
part(entry, 0x7c00)       ;
; --ENTRY---------------- ;

_start:
    mov [boot_drive+0x7c00], dl

    xor ax, ax
    mov ss, ax
    mov ds, ax
    mov es, ax
    mov sp, _start
    mov bp, _start

    mov cx, 512
    mov si, _start
    mov di, _strap

    rep movsb

    jmp 0:_strap+(b_boot_strapper-$$)

b_boot_strapper:
; ----------------------- ;
part(strap, 0x0600)       ;
; --BOOT STRAPPER-------- ;

_strap:
    xor cx, cx  
    .find_active_part:
        cmp cl, 4
        jge .no_active_part

        xor ax, ax
        mov ah, cl
        mov bl, 16
        mul bl

        mov bx, ax

        inc cl

        mov al, (1 << 7)
        mov ah, [partition_1+0x600+bx]
        and ah, al

        jnz .load_active_part
        jmp .find_active_part

    .load_active_part:
        xor ax, ax
        mov ds, ax

        mov ah, 42h
        mov dl, [boot_drive+0x600]
        mov si, dap+0x600
        push bx
        mov bx, dap+0x600
        mov es, bx
        pop bx

        mov cx, [partition_1+0x600+bx+8]
        mov [dap_startlba+0x600], cx
        mov cx, [partition_1+0x600+bx+12]
        mov [dap_sectors+0x600], cx

        int 13h

        jc .disk_error

        xor ax, ax
        mov ds, ax
        mov es, ax
        mov ss, ax
        mov sp, _start
        mov bp, _start


        mov dl, [boot_drive+0x600]
        jmp 0:0x7c00

    .no_active_part:
        mov si, msg_no_part

        call print
        jmp halt

    .disk_error:
        mov si, msg_er_read

        call print
        jmp halt

    print:
        mov dx, ax

        mov ah, 0Eh
        xor bh, bh
        mov bl, 0Fh

        .rep:
            lodsb
            or al, al
            jz .done
            int 10h
            jmp .rep

        .done:
            ret

    halt:
        cli
        hlt
        jmp halt

msg_er_read db 'Disk Read Error....', 0
msg_no_part db 'No Active Partition....', 0

; ----------------------- ;
rpart(variables, 300)     ;
; --VARIABLES------------ ;

boot_drive db 0

dap: ; Disk Address Packet
    db 16, 0
    dap_sectors  dw 0
    dap_offset   dw 0x7c00
    dap_segment  dw 0
    dap_startlba dq 0
dap_end:

; ----------------------- ;
rpart(partitions, 446)    ;
; --VARIABLES------------ ;

partition_1: ; This file has the following 16 bytes: 
; 0x80, 0x01, 0x00, 0x05, 0x17, 0x01, x03, 0x01, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00
%include "part_n1.asm"
partition_2: ; The rest of these files are just 16 null bytes.
%include "part_n2.asm"
partition_3:
%include "part_n3.asm"
partition_4:
%include "part_n4.asm"

; ------------------------------- ;
rpart(signature, 510)             ;
db 0x55, 0xAA                     ;
; ------------------------------- ;

This code works! However, I don't know if this is an issue with QEMU or not but when it reads from the sectors it has a bit of corruption or dataloss...

These are the bytes that were expected to be at 0x7c00

EB 1B B4 0E 30 FF B3 0F
AC 74 04 CD 10 EB F9 C3 
48 65 6C 6C 6F 20 57 6F 
72 6C 64 21 00 BE 10 7C 
E8 DF FF F4 

(It's a basic function that prints "Hello World!")

This is what ended up actually being in memory at that location:

EB 1B B4 0E 30 FF B3 0F 
AC 74 04 CD 10 EB F9 C3 
48 65 6C 6C 6F 20 57 6F 
72 6C 64 21 00 BE 10 7C 
F0 DF FF F4

If you look closely the 4th byte from the last was changed from E8 to F0, I have no idea why this happened. And in the last run the "E" in "Hello World" was also changed but it wasn't in this debug run.

I need help with even where to begin debugging this...


Edit 1

I realized that my function to print hello world had a few issues, weather or not they were related to this odd thing, I don't really know. In the repeating part of the print function (The one in the code I was loading, not in the mbr code above) I forgot to add or al, al after I did lodsb and before I did jz .done which might have been interfering with things, I am not completely sure, but after I updated that code and run a few more debug sessions it seems this issue doesn't occur anymore...


回答1:


There are a number of problems with your code, but it may well be that the issue is in the volume boot record you didn't show. Some of the problems in the MBR that should be resolved:

  • Set SP right after SS to ensure that an interrupt can't occur between the setting of SS and SP, which would corrupt memory at the address formed by the unknown old SP and the new SS (or vice versa). The CPU automatically turns off interrupts after setting SS and re-enables them after the following instruction.
  • Issue a CLD instruction to clear the direction flag (DF) so that string instructions like MOVSB and LODSB use forward movement.
  • When using Int 13h/ah=42h there are some BIOSes that need ES:BX set to the same values in the Disk Address Packet (DAP). Your code improperly sets ES. It should have been set to zero.
  • When filling in the start LBA in the DAP from the value in the active partition entry your code only copies the lower 16 bits of a 32-bit value. This limits you to media that is <= 32MiB (512*65536). You should copy both the lower and higher half of the starting LBA from the partition table to the DAP.
  • When doing a disk read or write you should retry the operation 3 more times before failing. This may be required on real hardware using actual floppy and hard drives.
  • You should check for an active partition by checking that the value is 0x80 and not just the top bit. The only valid values are either 0x00 or 0x80.
  • The volume boot record (VBR) you are loading from the partition is usually one sector. If it is one sector, read one sector and not the whole partition.
  • Your code is overly complex in the way you set up the sections. If you are going to relocate the bootloader to 0x0600 then use an ORG of 0x0600. Just make sure that the code that relocates the boot sector from 0x0000:0x7c00 to 0x0000:0x0600 doesn't rely on any labels that are relative to 0x7c00.

There are some things in your code that are nice to have:

  • Clean up how you loop through the partition table searching for the active/bootable partition.
  • If you ever wanted to remain compatible with some ancient OSes then pass the address of the bootable partition entry in DS:SI before jumping to the volume boot record you read into memory. This isn't required
  • Not a requirement and this is just meant as a note: if you ever wanted to retain compatibility with MS-DOS, partitions on a hard drive should always be on a cylinder boundary and preferably end on a cylinder boundary.

Some of these tips can be found in my Stackoverflow General Bootloader Tips.


A modified version of your relocatable bootloader that chain loads a Volume Boot Record (VBR) could coded as:

boot.asm:

DISK_RETRY     EQU 3
BOOT_ORG_RELOC EQU 0x0600
BOOT_ORG       EQU 0x7c00
MBR_SIZE       EQU 512

%define SECTION(n,l) section n start=l+BOOT_ORG_RELOC align=1

ORG BOOT_ORG_RELOC

_start:
    ; This code occurs before relocation so can't rely on any labels relative to
    ; BOOT_ORG_RELOC
    xor ax, ax
    mov es, ax
    mov ds, ax
    mov ss, ax
    mov sp, BOOT_ORG           ; Place stack at 0x0000:0x7c00 below bootloader

    cld                        ; DF=0 for forward direction of string instructions
    mov cx, MBR_SIZE/2         ; MBR Size to copy in bytes
    mov si, BOOT_ORG           ; Source address = DS:SI (0x0000:0x7c00)
    mov di, BOOT_ORG_RELOC     ; Destination address = ES:DI (0x0000:0x0600)
    rep movsw

    jmp 0x0000:.reloc_start    ; Set CS:IP to continue at the next instruction but in
                               ; the relocated boot sector
.reloc_start:
    ; Start at end of partition table and search to beginning looking for active
    ; boot partition.
    mov si, partition_start    ; SI = base of partition table
    mov bx, PARTITION_SIZE     ; Set the offset to search at to end of partition table
.active_search_loop:
    sub bx, 16                 ; Go to previous partition entry
    jl .no_active              ; If BX is neg we have passed beginning of partition table
    cmp byte [si + bx], 0x80   ; Is partition bootable?
    jnz .active_search_loop    ;     If not bootable go back and search again

.fnd_active:
    lea di, [si + bx]          ; Save offset of active partition to DI
    mov ax, [si + bx + 8]      ; Copy partition start LBA to DAP structure (lower 16-bits)
    mov [dap + 8], ax
    mov ax, [si + bx + 10]     ; Copy partition start LBA to DAP structure (upper 16-bits)
    mov [dap + 10], ax

    mov cx, DISK_RETRY
                               ; DL contains boot drive passed by BIOS
                               ; ES was previously set to 0
    mov bx, BOOT_ORG           ; ES:BX needs to be same values as the DAP for some BIOSes
    mov si, dap                ; DS:SI = beginning of DAP structure

.disk_retry:
    mov ah, 0x42               ; BIOS call for extended disk read
    int 0x13                   ; Read boot sector to 0x0000:0x7c00
    jnc .vbr_loaded            ; If int 0x13 succeeded (CF=0), run the loaded VBR

    dec cx                     ; Lower retry count by 1
    jge .disk_retry            ; If retry count >= 0 go back and try again

.disk_error:
    mov si, msg_er_read        ; Print disk error and halt
    call print
    jmp halt

.no_active:
    mov si, msg_no_part        ; Print no active partition error and halt
    call print
    jmp halt

.vbr_loaded:
                               ; DL is still same value oeiginally passed by BIOS
    mov si, di                 ; DS:SI=address of active partition for some old OSes
    jmp 0x0000:BOOT_ORG        ; Execute the chain loaded VBR

halt:                          ; Infinite HLT loop with interrupts off to end bootloader
    cli
.halt_loop:
    hlt
    jmp .halt_loop

; Print function
print:
    mov ah, 0x0e
    xor bh, bh

.rep:
    lodsb
    or al, al
    jz .done
    int 0x10
    jmp .rep

.done:
    ret

dap: ; Disk Address Packet
    db 16, 0                   ; DAP size, second byte always 0
    dap_sectors  dw 1          ; Read VBR (1 sector)
    dap_offset   dw BOOT_ORG   ; Read to 0x0000:0x7c00
    dap_segment  dw 0
    dap_startlba dq 0          ; To be filled in at runtime
dap_end:

msg_er_read: db 'Disk Read Error....', 0
msg_no_part: db 'No Active Partition....', 0

SECTION(parttbl, 446)
partition_start:
partition_1:
%include "part_n1.asm"
partition_2:
%include "part_n2.asm"
partition_3:
%include "part_n3.asm"
partition_4:
%include "part_n4.asm"
partition_end:
PARTITION_SIZE EQU partition_end - partition_start

SECTION(bootsig, 510)
dw 0xaa55

part_n1.asm:

db 0x80, 0x01, 0x00, 0x05, 0x17, 0x01, 0x03, 0x01, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00

part_n2.asm:

dq 0, 0

part_n3.asm:

dq 0, 0

part_n4.asm:

dq 0, 0

A simple Volume Boot Record (VBR) to test could be:

BOOT_ORG       EQU 0x7c00
%define SECTION(n,l) section n start=l+BOOT_ORG align=1

ORG BOOT_ORG

vbr_start:
    xor ax, ax                 ; ES=DS=SS=0
    mov es, ax
    mov ds, ax
    mov ss, ax
    mov sp, BOOT_ORG           ; Place stack at 0x0000:0x7c00 below bootloader
    cld                        ; DF=0 is forward direction for string instructions

    mov si, vbr_run_msg        ; Print a message that the VBR is running
    call print

halt:
    cli
.halt_loop:
    hlt
    jmp .halt_loop

; print function
print:
    mov ah, 0x0e
    xor bh, bh

.rep:
    lodsb
    or al, al
    jz .done
    int 0x10
    jmp .rep

.done:
    ret

vbr_run_msg: db "VBR running", 0x0d, 0x0a, 0

SECTION(bootsig, 510)
dw 0xaa55

You can build and run this code as a 10 megabyte disk image with these commands:

nasm -f bin boot.asm -o boot.bin
nasm -f bin vbr.asm -o vbr.bin

# create 10MiB disk image
dd if=/dev/zero of=disk.img bs=10M count=1

# place boot sector at LBA=0 without truncating the disk image
dd if=boot.bin of=disk.img conv=notrunc seek=0

# place vbr at LBA=4 without truncating the disk image
dd if=vbr.bin of=disk.img conv=notrunc seek=4

In QEMU you can run it with the command:

qemu-system-i386 -hda disk.img

If it works, the output should look similar to:



来源:https://stackoverflow.com/questions/59004215/weird-errors-while-reading-disk

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!