How to parse the AndroidManifest.xml file inside an .apk package

前端 未结 16 994
暖寄归人
暖寄归人 2020-11-22 10:32

This file appears to be in a binary XML format. What is this format and how can it be parsed programmatically (as opposed to using the aapt dump tool in the SDK)?

16条回答
  •  孤街浪徒
    2020-11-22 11:05

    In case it's useful, here's a C++ version of the Java snippet posted by Ribo:

    struct decompressXML
    {
        // decompressXML -- Parse the 'compressed' binary form of Android XML docs 
        // such as for AndroidManifest.xml in .apk files
        enum
        {
            endDocTag = 0x00100101,
            startTag =  0x00100102,
            endTag =    0x00100103
        };
    
        decompressXML(const BYTE* xml, int cb) {
        // Compressed XML file/bytes starts with 24x bytes of data,
        // 9 32 bit words in little endian order (LSB first):
        //   0th word is 03 00 08 00
        //   3rd word SEEMS TO BE:  Offset at then of StringTable
        //   4th word is: Number of strings in string table
        // WARNING: Sometime I indiscriminently display or refer to word in 
        //   little endian storage format, or in integer format (ie MSB first).
        int numbStrings = LEW(xml, cb, 4*4);
    
        // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets
        // of the length/string data in the StringTable.
        int sitOff = 0x24;  // Offset of start of StringIndexTable
    
        // StringTable, each string is represented with a 16 bit little endian 
        // character count, followed by that number of 16 bit (LE) (Unicode) chars.
        int stOff = sitOff + numbStrings*4;  // StringTable follows StrIndexTable
    
        // XMLTags, The XML tag tree starts after some unknown content after the
        // StringTable.  There is some unknown data after the StringTable, scan
        // forward from this point to the flag for the start of an XML start tag.
        int xmlTagOff = LEW(xml, cb, 3*4);  // Start from the offset in the 3rd word.
        // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int)
        for (int ii=xmlTagOff; ii");
            indent++;
    
          } else if (tag0 == endTag) { // XML END TAG
            indent--;
            off += 6*4;  // Skip over 6 words of endTag data
            std::string name = compXmlString(xml, cb, sitOff, stOff, nameSi);
            prtIndent(indent, "  (line "+toIntString(startTagLineNo)+"-"+toIntString(lineNo)+")");
            //tr.parent();  // Step back up the NobTree
    
          } else if (tag0 == endDocTag) {  // END OF XML DOC TAG
            break;
    
          } else {
            prt("  Unrecognized tag code '"+toHexString(tag0)
              +"' at offset "+toIntString(off));
            break;
          }
        } // end of while loop scanning tags and attributes of XML tree
        prt("    end at offset "+off);
        } // end of decompressXML
    
    
        std::string compXmlString(const BYTE* xml, int cb, int sitOff, int stOff, int strInd) {
          if (strInd < 0) return std::string("");
          int strOff = stOff + LEW(xml, cb, sitOff+strInd*4);
          return compXmlStringAt(xml, cb, strOff);
        }
    
        void prt(std::string str)
        {
            printf("%s", str.c_str());
        }
        void prtIndent(int indent, std::string str) {
            char spaces[46];
            memset(spaces, ' ', sizeof(spaces));
            spaces[min(indent*2,  sizeof(spaces) - 1)] = 0;
            prt(spaces);
            prt(str);
            prt("\n");
        }
    
    
        // compXmlStringAt -- Return the string stored in StringTable format at
        // offset strOff.  This offset points to the 16 bit string length, which 
        // is followed by that number of 16 bit (Unicode) chars.
        std::string compXmlStringAt(const BYTE* arr, int cb, int strOff) {
            if (cb < strOff + 2) return std::string("");
          int strLen = arr[strOff+1]<<8&0xff00 | arr[strOff]&0xff;
          char* chars = new char[strLen + 1];
          chars[strLen] = 0;
          for (int ii=0; ii off + 3) ? ( arr[off+3]<<24&0xff000000 | arr[off+2]<<16&0xff0000
              | arr[off+1]<<8&0xff00 | arr[off]&0xFF ) : 0;
        } // end of LEW
    
        std::string toHexString(DWORD attrResId)
        {
            char ch[20];
            sprintf_s(ch, 20, "%lx", attrResId);
            return std::string(ch);
        }
        std::string toIntString(int i)
        {
            char ch[20];
            sprintf_s(ch, 20, "%ld", i);
            return std::string(ch);
        }
    };
    

提交回复
热议问题