This file appears to be in a binary XML format. What is this format and how can it be parsed programmatically (as opposed to using the aapt dump tool in the SDK)?
In case it's useful, here's a C++ version of the Java snippet posted by Ribo:
struct decompressXML
{
// decompressXML -- Parse the 'compressed' binary form of Android XML docs
// such as for AndroidManifest.xml in .apk files
enum
{
endDocTag = 0x00100101,
startTag = 0x00100102,
endTag = 0x00100103
};
decompressXML(const BYTE* xml, int cb) {
// Compressed XML file/bytes starts with 24x bytes of data,
// 9 32 bit words in little endian order (LSB first):
// 0th word is 03 00 08 00
// 3rd word SEEMS TO BE: Offset at then of StringTable
// 4th word is: Number of strings in string table
// WARNING: Sometime I indiscriminently display or refer to word in
// little endian storage format, or in integer format (ie MSB first).
int numbStrings = LEW(xml, cb, 4*4);
// StringIndexTable starts at offset 24x, an array of 32 bit LE offsets
// of the length/string data in the StringTable.
int sitOff = 0x24; // Offset of start of StringIndexTable
// StringTable, each string is represented with a 16 bit little endian
// character count, followed by that number of 16 bit (LE) (Unicode) chars.
int stOff = sitOff + numbStrings*4; // StringTable follows StrIndexTable
// XMLTags, The XML tag tree starts after some unknown content after the
// StringTable. There is some unknown data after the StringTable, scan
// forward from this point to the flag for the start of an XML start tag.
int xmlTagOff = LEW(xml, cb, 3*4); // Start from the offset in the 3rd word.
// Scan forward until we find the bytes: 0x02011000(x00100102 in normal int)
for (int ii=xmlTagOff; ii");
indent++;
} else if (tag0 == endTag) { // XML END TAG
indent--;
off += 6*4; // Skip over 6 words of endTag data
std::string name = compXmlString(xml, cb, sitOff, stOff, nameSi);
prtIndent(indent, ""+name+"> (line "+toIntString(startTagLineNo)+"-"+toIntString(lineNo)+")");
//tr.parent(); // Step back up the NobTree
} else if (tag0 == endDocTag) { // END OF XML DOC TAG
break;
} else {
prt(" Unrecognized tag code '"+toHexString(tag0)
+"' at offset "+toIntString(off));
break;
}
} // end of while loop scanning tags and attributes of XML tree
prt(" end at offset "+off);
} // end of decompressXML
std::string compXmlString(const BYTE* xml, int cb, int sitOff, int stOff, int strInd) {
if (strInd < 0) return std::string("");
int strOff = stOff + LEW(xml, cb, sitOff+strInd*4);
return compXmlStringAt(xml, cb, strOff);
}
void prt(std::string str)
{
printf("%s", str.c_str());
}
void prtIndent(int indent, std::string str) {
char spaces[46];
memset(spaces, ' ', sizeof(spaces));
spaces[min(indent*2, sizeof(spaces) - 1)] = 0;
prt(spaces);
prt(str);
prt("\n");
}
// compXmlStringAt -- Return the string stored in StringTable format at
// offset strOff. This offset points to the 16 bit string length, which
// is followed by that number of 16 bit (Unicode) chars.
std::string compXmlStringAt(const BYTE* arr, int cb, int strOff) {
if (cb < strOff + 2) return std::string("");
int strLen = arr[strOff+1]<<8&0xff00 | arr[strOff]&0xff;
char* chars = new char[strLen + 1];
chars[strLen] = 0;
for (int ii=0; ii off + 3) ? ( arr[off+3]<<24&0xff000000 | arr[off+2]<<16&0xff0000
| arr[off+1]<<8&0xff00 | arr[off]&0xFF ) : 0;
} // end of LEW
std::string toHexString(DWORD attrResId)
{
char ch[20];
sprintf_s(ch, 20, "%lx", attrResId);
return std::string(ch);
}
std::string toIntString(int i)
{
char ch[20];
sprintf_s(ch, 20, "%ld", i);
return std::string(ch);
}
};