问题
So, I've been working on a hobby project. Creating my own Operating System. I started a while back but dropped it until maybe a couple nights ago. I just fixed an oversight that caused nothing to be read from the sectors I want to read from. With that error out of the way, a new one has came about and I honestly don't even know where to begin debugging this one.
I am coding a Master Boot Record and debugging it with GDB and QEMU, here is the code to my master boot record (It was assembled using YASM)
Sorry if my code is not very good. I am not an expert at assembly language...
; yasm boot.asm -fbin
bits 16
%define part(n,l) section n vstart=l align=1
%define rpart(n,l) section n start=l align=1
; ----------------------- ;
part(entry, 0x7c00) ;
; --ENTRY---------------- ;
_start:
mov [boot_drive+0x7c00], dl
xor ax, ax
mov ss, ax
mov ds, ax
mov es, ax
mov sp, _start
mov bp, _start
mov cx, 512
mov si, _start
mov di, _strap
rep movsb
jmp 0:_strap+(b_boot_strapper-$$)
b_boot_strapper:
; ----------------------- ;
part(strap, 0x0600) ;
; --BOOT STRAPPER-------- ;
_strap:
xor cx, cx
.find_active_part:
cmp cl, 4
jge .no_active_part
xor ax, ax
mov ah, cl
mov bl, 16
mul bl
mov bx, ax
inc cl
mov al, (1 << 7)
mov ah, [partition_1+0x600+bx]
and ah, al
jnz .load_active_part
jmp .find_active_part
.load_active_part:
xor ax, ax
mov ds, ax
mov ah, 42h
mov dl, [boot_drive+0x600]
mov si, dap+0x600
push bx
mov bx, dap+0x600
mov es, bx
pop bx
mov cx, [partition_1+0x600+bx+8]
mov [dap_startlba+0x600], cx
mov cx, [partition_1+0x600+bx+12]
mov [dap_sectors+0x600], cx
int 13h
jc .disk_error
xor ax, ax
mov ds, ax
mov es, ax
mov ss, ax
mov sp, _start
mov bp, _start
mov dl, [boot_drive+0x600]
jmp 0:0x7c00
.no_active_part:
mov si, msg_no_part
call print
jmp halt
.disk_error:
mov si, msg_er_read
call print
jmp halt
print:
mov dx, ax
mov ah, 0Eh
xor bh, bh
mov bl, 0Fh
.rep:
lodsb
or al, al
jz .done
int 10h
jmp .rep
.done:
ret
halt:
cli
hlt
jmp halt
msg_er_read db 'Disk Read Error....', 0
msg_no_part db 'No Active Partition....', 0
; ----------------------- ;
rpart(variables, 300) ;
; --VARIABLES------------ ;
boot_drive db 0
dap: ; Disk Address Packet
db 16, 0
dap_sectors dw 0
dap_offset dw 0x7c00
dap_segment dw 0
dap_startlba dq 0
dap_end:
; ----------------------- ;
rpart(partitions, 446) ;
; --VARIABLES------------ ;
partition_1: ; This file has the following 16 bytes:
; 0x80, 0x01, 0x00, 0x05, 0x17, 0x01, x03, 0x01, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00
%include "part_n1.asm"
partition_2: ; The rest of these files are just 16 null bytes.
%include "part_n2.asm"
partition_3:
%include "part_n3.asm"
partition_4:
%include "part_n4.asm"
; ------------------------------- ;
rpart(signature, 510) ;
db 0x55, 0xAA ;
; ------------------------------- ;
This code works! However, I don't know if this is an issue with QEMU or not but when it reads from the sectors it has a bit of corruption or dataloss...
These are the bytes that were expected to be at 0x7c00
EB 1B B4 0E 30 FF B3 0F
AC 74 04 CD 10 EB F9 C3
48 65 6C 6C 6F 20 57 6F
72 6C 64 21 00 BE 10 7C
E8 DF FF F4
(It's a basic function that prints "Hello World!")
This is what ended up actually being in memory at that location:
EB 1B B4 0E 30 FF B3 0F
AC 74 04 CD 10 EB F9 C3
48 65 6C 6C 6F 20 57 6F
72 6C 64 21 00 BE 10 7C
F0 DF FF F4
If you look closely the 4th byte from the last was changed from E8 to F0, I have no idea why this happened. And in the last run the "E" in "Hello World" was also changed but it wasn't in this debug run.
I need help with even where to begin debugging this...
Edit 1
I realized that my function to print hello world had a few issues, weather or not they were related to this odd thing, I don't really know. In the repeating part of the print function (The one in the code I was loading, not in the mbr code above) I forgot to add or al, al
after I did lodsb
and before I did jz .done
which might have been interfering with things, I am not completely sure, but after I updated that code and run a few more debug sessions it seems this issue doesn't occur anymore...
回答1:
There are a number of problems with your code, but it may well be that the issue is in the volume boot record you didn't show. Some of the problems in the MBR that should be resolved:
- Set SP right after SS to ensure that an interrupt can't occur between the setting of SS and SP, which would corrupt memory at the address formed by the unknown old SP and the new SS (or vice versa). The CPU automatically turns off interrupts after setting SS and re-enables them after the following instruction.
- Issue a CLD instruction to clear the direction flag (DF) so that string instructions like MOVSB and LODSB use forward movement.
- When using Int 13h/ah=42h there are some BIOSes that need ES:BX set to the same values in the Disk Address Packet (DAP). Your code improperly sets ES. It should have been set to zero.
- When filling in the start LBA in the DAP from the value in the active partition entry your code only copies the lower 16 bits of a 32-bit value. This limits you to media that is <= 32MiB (512*65536). You should copy both the lower and higher half of the starting LBA from the partition table to the DAP.
- When doing a disk read or write you should retry the operation 3 more times before failing. This may be required on real hardware using actual floppy and hard drives.
- You should check for an active partition by checking that the value is 0x80 and not just the top bit. The only valid values are either 0x00 or 0x80.
- The volume boot record (VBR) you are loading from the partition is usually one sector. If it is one sector, read one sector and not the whole partition.
- Your code is overly complex in the way you set up the sections. If you are going to relocate the bootloader to 0x0600 then use an ORG of 0x0600. Just make sure that the code that relocates the boot sector from 0x0000:0x7c00 to 0x0000:0x0600 doesn't rely on any labels that are relative to 0x7c00.
There are some things in your code that are nice to have:
- Clean up how you loop through the partition table searching for the active/bootable partition.
- If you ever wanted to remain compatible with some ancient OSes then pass the address of the bootable partition entry in DS:SI before jumping to the volume boot record you read into memory. This isn't required
- Not a requirement and this is just meant as a note: if you ever wanted to retain compatibility with MS-DOS, partitions on a hard drive should always be on a cylinder boundary and preferably end on a cylinder boundary.
Some of these tips can be found in my Stackoverflow General Bootloader Tips.
A modified version of your relocatable bootloader that chain loads a Volume Boot Record (VBR) could coded as:
boot.asm:
DISK_RETRY EQU 3
BOOT_ORG_RELOC EQU 0x0600
BOOT_ORG EQU 0x7c00
MBR_SIZE EQU 512
%define SECTION(n,l) section n start=l+BOOT_ORG_RELOC align=1
ORG BOOT_ORG_RELOC
_start:
; This code occurs before relocation so can't rely on any labels relative to
; BOOT_ORG_RELOC
xor ax, ax
mov es, ax
mov ds, ax
mov ss, ax
mov sp, BOOT_ORG ; Place stack at 0x0000:0x7c00 below bootloader
cld ; DF=0 for forward direction of string instructions
mov cx, MBR_SIZE/2 ; MBR Size to copy in bytes
mov si, BOOT_ORG ; Source address = DS:SI (0x0000:0x7c00)
mov di, BOOT_ORG_RELOC ; Destination address = ES:DI (0x0000:0x0600)
rep movsw
jmp 0x0000:.reloc_start ; Set CS:IP to continue at the next instruction but in
; the relocated boot sector
.reloc_start:
; Start at end of partition table and search to beginning looking for active
; boot partition.
mov si, partition_start ; SI = base of partition table
mov bx, PARTITION_SIZE ; Set the offset to search at to end of partition table
.active_search_loop:
sub bx, 16 ; Go to previous partition entry
jl .no_active ; If BX is neg we have passed beginning of partition table
cmp byte [si + bx], 0x80 ; Is partition bootable?
jnz .active_search_loop ; If not bootable go back and search again
.fnd_active:
lea di, [si + bx] ; Save offset of active partition to DI
mov ax, [si + bx + 8] ; Copy partition start LBA to DAP structure (lower 16-bits)
mov [dap + 8], ax
mov ax, [si + bx + 10] ; Copy partition start LBA to DAP structure (upper 16-bits)
mov [dap + 10], ax
mov cx, DISK_RETRY
; DL contains boot drive passed by BIOS
; ES was previously set to 0
mov bx, BOOT_ORG ; ES:BX needs to be same values as the DAP for some BIOSes
mov si, dap ; DS:SI = beginning of DAP structure
.disk_retry:
mov ah, 0x42 ; BIOS call for extended disk read
int 0x13 ; Read boot sector to 0x0000:0x7c00
jnc .vbr_loaded ; If int 0x13 succeeded (CF=0), run the loaded VBR
dec cx ; Lower retry count by 1
jge .disk_retry ; If retry count >= 0 go back and try again
.disk_error:
mov si, msg_er_read ; Print disk error and halt
call print
jmp halt
.no_active:
mov si, msg_no_part ; Print no active partition error and halt
call print
jmp halt
.vbr_loaded:
; DL is still same value oeiginally passed by BIOS
mov si, di ; DS:SI=address of active partition for some old OSes
jmp 0x0000:BOOT_ORG ; Execute the chain loaded VBR
halt: ; Infinite HLT loop with interrupts off to end bootloader
cli
.halt_loop:
hlt
jmp .halt_loop
; Print function
print:
mov ah, 0x0e
xor bh, bh
.rep:
lodsb
or al, al
jz .done
int 0x10
jmp .rep
.done:
ret
dap: ; Disk Address Packet
db 16, 0 ; DAP size, second byte always 0
dap_sectors dw 1 ; Read VBR (1 sector)
dap_offset dw BOOT_ORG ; Read to 0x0000:0x7c00
dap_segment dw 0
dap_startlba dq 0 ; To be filled in at runtime
dap_end:
msg_er_read: db 'Disk Read Error....', 0
msg_no_part: db 'No Active Partition....', 0
SECTION(parttbl, 446)
partition_start:
partition_1:
%include "part_n1.asm"
partition_2:
%include "part_n2.asm"
partition_3:
%include "part_n3.asm"
partition_4:
%include "part_n4.asm"
partition_end:
PARTITION_SIZE EQU partition_end - partition_start
SECTION(bootsig, 510)
dw 0xaa55
part_n1.asm:
db 0x80, 0x01, 0x00, 0x05, 0x17, 0x01, 0x03, 0x01, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00
part_n2.asm:
dq 0, 0
part_n3.asm:
dq 0, 0
part_n4.asm:
dq 0, 0
A simple Volume Boot Record (VBR) to test could be:
BOOT_ORG EQU 0x7c00
%define SECTION(n,l) section n start=l+BOOT_ORG align=1
ORG BOOT_ORG
vbr_start:
xor ax, ax ; ES=DS=SS=0
mov es, ax
mov ds, ax
mov ss, ax
mov sp, BOOT_ORG ; Place stack at 0x0000:0x7c00 below bootloader
cld ; DF=0 is forward direction for string instructions
mov si, vbr_run_msg ; Print a message that the VBR is running
call print
halt:
cli
.halt_loop:
hlt
jmp .halt_loop
; print function
print:
mov ah, 0x0e
xor bh, bh
.rep:
lodsb
or al, al
jz .done
int 0x10
jmp .rep
.done:
ret
vbr_run_msg: db "VBR running", 0x0d, 0x0a, 0
SECTION(bootsig, 510)
dw 0xaa55
You can build and run this code as a 10 megabyte disk image with these commands:
nasm -f bin boot.asm -o boot.bin
nasm -f bin vbr.asm -o vbr.bin
# create 10MiB disk image
dd if=/dev/zero of=disk.img bs=10M count=1
# place boot sector at LBA=0 without truncating the disk image
dd if=boot.bin of=disk.img conv=notrunc seek=0
# place vbr at LBA=4 without truncating the disk image
dd if=vbr.bin of=disk.img conv=notrunc seek=4
In QEMU you can run it with the command:
qemu-system-i386 -hda disk.img
If it works, the output should look similar to:
来源:https://stackoverflow.com/questions/59004215/weird-errors-while-reading-disk