I\'m trying to inspect a buffer which contains a binary formatted message, but also contains string data. As an example, I\'m using this C code:
int main (vo
(gdb) define xxd
>dump binary memory dump.bin $arg0 $arg0+$arg1
>shell xxd dump.bin
>end
(gdb) xxd &j 10
0000000: 0000 0000 0000 0000 0000 0000 4d8c a7f7 ............M...
0000010: ff7f 0000 0000 0000 0000 0000 c8d7 ffff ................
0000020: ff7f 0000 0000 0000
Seems easy enough ;-)
You could likely write a Python script (modern GDB versions have embedded Python interpreter) to do the same, and get rid of the need to "shell out".
Unfortunately, @FatalError's and @gunthor's versions didn't work for me, so I wrote yet another one myself. This is how it looks like:
(gdb) xxd hello_string 0xc
00000001_00000f87: 48 656c 6c6f 0957 6f72 Hello.Wor
00000001_00000f90: 6c64 0a ld.
Newer versions of xxd
supports the -o
flag that allows specifying an offset to add to the displayed one (which will always start at 0000000
).
In case xxd -o
isn't available, here is a substitute that correctly aligns and shows address of the location that is xxd
'd.
The xxd
command:
define xxd
dump binary memory /tmp/dump.bin $arg0 $arg0+$arg1
eval "shell xxd-o %p /tmp/dump.bin", $arg0
end
The arguably ugly perl script xxd-o (xxd
with offset):
#!/usr/bin/env perl
use IPC::Open2;
$SIG{'__WARN__'} = sub{ die "$0: $!\n" };
my $offset = shift // "0";
$offset = oct($offset) if $offset =~ /^0/;
my $base = $offset >= 2**32 ? 16 : 8;
my $zeroes = $offset % 16;
my $padding = 1 + int($zeroes / 2) + 2*$zeroes;
my $bytestr = "\0" x $zeroes;
{ local $/; $bytestr .= <> }
open2(\*XXD_OUT, \*XXD_IN, "xxd") or die "xxd is not available!";
print XXD_IN $bytestr; close XXD_IN;
if ($zeroes) {
$_ = <XXD_OUT>;
s/^(.{50}).{$zeroes}/$1 . (' ' x $zeroes)/ge;
s/^([[:xdigit:]]+:).{$padding}/$1 . (' ' x $padding)/ge;
my $newoff = sprintf("%0${base}x",hex($1)+$offset) =~ s/^(.{8})(.{8})$/$1_$2/r;
s/^([[:xdigit:]]+):/$newoff:/g;
print
}
while (<XXD_OUT>) {
s/^([[:xdigit:]]+)(?=:)/sprintf("%0${base}x", hex($1)+$offset-$offset%16) =~ s[^(.{8})(.{8})$][$1_$2]r/ge;
print
}
Improvements welcome! :-)
So, I ended up playing around with the python interface and came up with this:
import gdb
from curses.ascii import isgraph
def groups_of(iterable, size, first=0):
first = first if first != 0 else size
chunk, iterable = iterable[:first], iterable[first:]
while chunk:
yield chunk
chunk, iterable = iterable[:size], iterable[size:]
class HexDump(gdb.Command):
def __init__(self):
super (HexDump, self).__init__ ('hex-dump', gdb.COMMAND_DATA)
def invoke(self, arg, from_tty):
argv = gdb.string_to_argv(arg)
if len(argv) != 2:
raise gdb.GdbError('hex-dump takes exactly 2 arguments.')
addr = gdb.parse_and_eval(argv[0]).cast(
gdb.lookup_type('void').pointer())
try:
bytes = int(gdb.parse_and_eval(argv[1]))
except ValueError:
raise gdb.GdbError('Byte count numst be an integer value.')
inferior = gdb.selected_inferior()
align = gdb.parameter('hex-dump-align')
width = gdb.parameter('hex-dump-width')
if width == 0:
width = 16
mem = inferior.read_memory(addr, bytes)
pr_addr = int(str(addr), 16)
pr_offset = width
if align:
pr_offset = width - (pr_addr % width)
pr_addr -= pr_addr % width
for group in groups_of(mem, width, pr_offset):
print '0x%x: ' % (pr_addr,) + ' '*(width - pr_offset),
print ' '.join(['%02X' % (ord(g),) for g in group]) + \
' ' * (width - len(group) if pr_offset == width else 0) + ' ',
print ' '*(width - pr_offset) + ''.join(
[g if isgraph(g) or g == ' ' else '.' for g in group])
pr_addr += width
pr_offset = width
class HexDumpAlign(gdb.Parameter):
def __init__(self):
super (HexDumpAlign, self).__init__('hex-dump-align',
gdb.COMMAND_DATA,
gdb.PARAM_BOOLEAN)
set_doc = 'Determines if hex-dump always starts at an "aligned" address (see hex-dump-width'
show_doc = 'Hex dump alignment is currently'
class HexDumpWidth(gdb.Parameter):
def __init__(self):
super (HexDumpWidth, self).__init__('hex-dump-width',
gdb.COMMAND_DATA,
gdb.PARAM_INTEGER)
set_doc = 'Set the number of bytes per line of hex-dump'
show_doc = 'The number of bytes per line in hex-dump is'
HexDump()
HexDumpAlign()
HexDumpWidth()
I realize it might not be the most beautiful and elegant solution, but it gets the job done and works as a first draft. It could be included in ~/.gdbinit
like:
python
sys.path.insert(0, '/path/to/module/dir')
import hexdump
end
Then could be used with the program above like so:
(gdb) hex-dump buf 100
0x7fffffffdf00: 01 02 03 04 53 74 72 69 6E 67 20 44 61 74 61 AA ....String Data.
0x7fffffffdf10: BB CC 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf60: 00 00 00 00 ....
And a few other touches for good measure:
(gdb) set hex-dump-align on
Determines if hex-dump always starts at an "aligned" address (see hex-dump-width
(gdb) hex-dump &buf[5] 95
0x7fffffffdf00: 74 72 69 6E 67 20 44 61 74 61 AA tring Data.
0x7fffffffdf10: BB CC 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x7fffffffdf60: 00 00 00 00 ....
(gdb) set hex-dump-width 8
Set the number of bytes per line of hex-dump
(gdb) hex-dump &buf[5] 95
0x7fffffffdf00: 74 72 69 tri
0x7fffffffdf08: 6E 67 20 44 61 74 61 AA ng Data.
0x7fffffffdf10: BB CC 00 00 00 00 00 00 ........
0x7fffffffdf18: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf20: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf28: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf30: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf38: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf40: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf48: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf50: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf58: 00 00 00 00 00 00 00 00 ........
0x7fffffffdf60: 00 00 00 00 ....
No promises that there aren't bugs :). I might stick it up in github or something if people are interested.
I've only tested it with GDB 7.4.
My own contribution, from Employed Russian solution and Roger Lipscombe comments:
The script (tested with gdb 7.8.1):
define xxd
if $argc < 2
set $size = sizeof(*$arg0)
else
set $size = $arg1
end
dump binary memory dump.bin $arg0 ((void *)$arg0)+$size
eval "shell xxd -o %d dump.bin; rm dump.bin", ((void *)$arg0)
end
document xxd
Dump memory with xxd command (keep the address as offset)
xxd addr [size]
addr -- expression resolvable as an address
size -- size (in byte) of memory to dump
sizeof(*addr) is used by default
end
Examples:
(gdb) p &m_data
$1 = (data_t *) 0x200130dc <m_data>
(gdb) p sizeof(m_data)
$2 = 32
(gdb) xxd &m_data 32
200130dc: 0300 0000 e87c 0400 0000 0000 0100 0000 .....|..........
200130ec: 0c01 0000 b831 0020 0100 0000 0100 0000 .....1. ........
(gdb) xxd &m_data
200130dc: 0300 0000 e87c 0400 0000 0000 0100 0000 .....|..........
200130ec: 0c01 0000 b831 0020 0100 0000 0100 0000 .....1. ........
(gdb) help xxd
Dump memory with xxd command (keep the address as offset)
xxd addr [size]
addr -- expression resolvable as an address
size -- size (in byte) of memory to dump
sizeof(*addr) is used by default
an adapted version of the solution of User FatalError
examples
hd 0xbfffe4f1
hd 0xbfffe4f1 500
import gdb
from curses.ascii import isgraph
def groups_of(iterable, size, first=0):
first = first if first != 0 else size
chunk, iterable = iterable[:first], iterable[first:]
while chunk:
yield chunk
chunk, iterable = iterable[:size], iterable[size:]
class HexDump(gdb.Command):
def __init__(self):
super (HexDump, self).__init__ ('hd', gdb.COMMAND_DATA)
def invoke(self, arg, from_tty):
argv = gdb.string_to_argv(arg)
addr = gdb.parse_and_eval(argv[0]).cast(
gdb.lookup_type('void').pointer())
if len(argv) == 2:
try:
bytes = int(gdb.parse_and_eval(argv[1]))
except ValueError:
raise gdb.GdbError('Byte count numst be an integer value.')
else:
bytes = 500
inferior = gdb.selected_inferior()
align = gdb.parameter('hex-dump-align')
width = gdb.parameter('hex-dump-width')
if width == 0:
width = 16
mem = inferior.read_memory(addr, bytes)
pr_addr = int(str(addr), 16)
pr_offset = width
if align:
pr_offset = width - (pr_addr % width)
pr_addr -= pr_addr % width
start=(pr_addr) & 0xff;
print (' ' , end="")
print (' '.join(['%01X' % (i&0x0f,) for i in range(start,start+width)]) , end="")
print (' ' , end="")
print (' '.join(['%01X' % (i&0x0f,) for i in range(start,start+width)]) )
for group in groups_of(mem, width, pr_offset):
print ('0x%x: ' % (pr_addr,) + ' '*(width - pr_offset), end="")
print (' '.join(['%02X' % (ord(g),) for g in group]) + \
' ' * (width - len(group) if pr_offset == width else 0) + ' ', end="")
print (' '*(width - pr_offset) + ' '.join(
[chr( int.from_bytes(g, byteorder='big')) if isgraph( int.from_bytes(g, byteorder='big') ) or g == ' ' else '.' for g in group]))
pr_addr += width
pr_offset = width
class HexDumpAlign(gdb.Parameter):
def __init__(self):
super (HexDumpAlign, self).__init__('hex-dump-align',
gdb.COMMAND_DATA,
gdb.PARAM_BOOLEAN)
set_doc = 'Determines if hex-dump always starts at an "aligned" address (see hex-dump-width'
show_doc = 'Hex dump alignment is currently'
class HexDumpWidth(gdb.Parameter):
def __init__(self):
super (HexDumpWidth, self).__init__('hex-dump-width',
gdb.COMMAND_DATA,
gdb.PARAM_INTEGER)
set_doc = 'Set the number of bytes per line of hex-dump'
show_doc = 'The number of bytes per line in hex-dump is'
HexDump()
HexDumpAlign()
HexDumpWidth()