How to parse the AndroidManifest.xml file inside an .apk package

前端 未结 16 1012
暖寄归人
暖寄归人 2020-11-22 10:32

This file appears to be in a binary XML format. What is this format and how can it be parsed programmatically (as opposed to using the aapt dump tool in the SDK)?

相关标签:
16条回答
  • 2020-11-22 10:48

    @Mathieu Kotlin version follows:

    fun main(args : Array<String>) {
        val fileName = "app.apk"
        ZipFile(fileName).use { zip ->
            zip.entries().asSequence().forEach { entry ->
                if(entry.name == "AndroidManifest.xml") {
                    zip.getInputStream(entry).use { input ->
                        val xml = decompressXML(input.readBytes())
                        //TODO: parse the XML
                        println(xml)
    
                    }
                }
            }
        }
    }
    
        /**
         * Binary XML doc ending Tag
         */
        var endDocTag = 0x00100101
    
        /**
         * Binary XML start Tag
         */
        var startTag = 0x00100102
    
        /**
         * Binary XML end Tag
         */
        var endTag = 0x00100103
    
    
        /**
         * Reference var for spacing
         * Used in prtIndent()
         */
        var spaces = "                                             "
    
    
        /**
         * Parse the 'compressed' binary form of Android XML docs
         * such as for AndroidManifest.xml in .apk files
         * Source: http://stackoverflow.com/questions/2097813/how-to-parse-the-androidmanifest-xml-file-inside-an-apk-package/4761689#4761689
         *
         * @param xml Encoded XML content to decompress
         */
        fun decompressXML(xml: ByteArray): String {
    
            val resultXml = StringBuilder()
    
            // Compressed XML file/bytes starts with 24x bytes of data,
            // 9 32 bit words in little endian order (LSB first):
            //   0th word is 03 00 08 00
            //   3rd word SEEMS TO BE:  Offset at then of StringTable
            //   4th word is: Number of strings in string table
            // WARNING: Sometime I indiscriminently display or refer to word in
            //   little endian storage format, or in integer format (ie MSB first).
            val numbStrings = LEW(xml, 4 * 4)
    
            // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets
            // of the length/string data in the StringTable.
            val sitOff = 0x24  // Offset of start of StringIndexTable
    
            // StringTable, each string is represented with a 16 bit little endian
            // character count, followed by that number of 16 bit (LE) (Unicode) chars.
            val stOff = sitOff + numbStrings * 4  // StringTable follows StrIndexTable
    
            // XMLTags, The XML tag tree starts after some unknown content after the
            // StringTable.  There is some unknown data after the StringTable, scan
            // forward from this point to the flag for the start of an XML start tag.
            var xmlTagOff = LEW(xml, 3 * 4)  // Start from the offset in the 3rd word.
            // Scan forward until we find the bytes: 0x02011000(x00100102 in normal int)
            run {
                var ii = xmlTagOff
                while (ii < xml.size - 4) {
                    if (LEW(xml, ii) == startTag) {
                        xmlTagOff = ii
                        break
                    }
                    ii += 4
                }
            } // end of hack, scanning for start of first start tag
    
            // XML tags and attributes:
            // Every XML start and end tag consists of 6 32 bit words:
            //   0th word: 02011000 for startTag and 03011000 for endTag
            //   1st word: a flag?, like 38000000
            //   2nd word: Line of where this tag appeared in the original source file
            //   3rd word: FFFFFFFF ??
            //   4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS
            //   5th word: StringIndex of Element Name
            //   (Note: 01011000 in 0th word means end of XML document, endDocTag)
    
            // Start tags (not end tags) contain 3 more words:
            //   6th word: 14001400 meaning??
            //   7th word: Number of Attributes that follow this tag(follow word 8th)
            //   8th word: 00000000 meaning??
    
            // Attributes consist of 5 words:
            //   0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF
            //   1st word: StringIndex of Attribute Name
            //   2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId used
            //   3rd word: Flags?
            //   4th word: str ind of attr value again, or ResourceId of value
    
            // TMP, dump string table to tr for debugging
            //tr.addSelect("strings", null);
            //for (int ii=0; ii<numbStrings; ii++) {
            //  // Length of string starts at StringTable plus offset in StrIndTable
            //  String str = compXmlString(xml, sitOff, stOff, ii);
            //  tr.add(String.valueOf(ii), str);
            //}
            //tr.parent();
    
            // Step through the XML tree element tags and attributes
            var off = xmlTagOff
            var indent = 0
            var startTagLineNo = -2
            while (off < xml.size) {
                val tag0 = LEW(xml, off)
                //int tag1 = LEW(xml, off+1*4);
                val lineNo = LEW(xml, off + 2 * 4)
                //int tag3 = LEW(xml, off+3*4);
                val nameNsSi = LEW(xml, off + 4 * 4)
                val nameSi = LEW(xml, off + 5 * 4)
    
                if (tag0 == startTag) { // XML START TAG
                    val tag6 = LEW(xml, off + 6 * 4)  // Expected to be 14001400
                    val numbAttrs = LEW(xml, off + 7 * 4)  // Number of Attributes to follow
                    //int tag8 = LEW(xml, off+8*4);  // Expected to be 00000000
                    off += 9 * 4  // Skip over 6+3 words of startTag data
                    val name = compXmlString(xml, sitOff, stOff, nameSi)
                    //tr.addSelect(name, null);
                    startTagLineNo = lineNo
    
                    // Look for the Attributes
                    val sb = StringBuffer()
                    for (ii in 0 until numbAttrs) {
                        val attrNameNsSi = LEW(xml, off)  // AttrName Namespace Str Ind, or FFFFFFFF
                        val attrNameSi = LEW(xml, off + 1 * 4)  // AttrName String Index
                        val attrValueSi = LEW(xml, off + 2 * 4) // AttrValue Str Ind, or FFFFFFFF
                        val attrFlags = LEW(xml, off + 3 * 4)
                        val attrResId = LEW(xml, off + 4 * 4)  // AttrValue ResourceId or dup AttrValue StrInd
                        off += 5 * 4  // Skip over the 5 words of an attribute
    
                        val attrName = compXmlString(xml, sitOff, stOff, attrNameSi)
                        val attrValue = if (attrValueSi != -1)
                            compXmlString(xml, sitOff, stOff, attrValueSi)
                        else
                            "resourceID 0x" + Integer.toHexString(attrResId)
                        sb.append(" $attrName=\"$attrValue\"")
                        //tr.add(attrName, attrValue);
                    }
                    resultXml.append(prtIndent(indent, "<$name$sb>"))
                    indent++
    
                } else if (tag0 == endTag) { // XML END TAG
                    indent--
                    off += 6 * 4  // Skip over 6 words of endTag data
                    val name = compXmlString(xml, sitOff, stOff, nameSi)
                    resultXml.append(prtIndent(indent, "</$name>  (line $startTagLineNo-$lineNo)"))
                    //tr.parent();  // Step back up the NobTree
    
                } else if (tag0 == endDocTag) {  // END OF XML DOC TAG
                    break
    
                } else {
                            println("  Unrecognized tag code '" + Integer.toHexString(tag0)
                                + "' at offset " + off
                    )
                    break
                }
            } // end of while loop scanning tags and attributes of XML tree
            println("    end at offset $off")
    
            return resultXml.toString()
        } // end of decompressXML
    
    
        /**
         * Tool Method for decompressXML();
         * Compute binary XML to its string format
         * Source: Source: http://stackoverflow.com/questions/2097813/how-to-parse-the-androidmanifest-xml-file-inside-an-apk-package/4761689#4761689
         *
         * @param xml Binary-formatted XML
         * @param sitOff
         * @param stOff
         * @param strInd
         * @return String-formatted XML
         */
        fun compXmlString(xml: ByteArray, sitOff: Int, stOff: Int, strInd: Int): String? {
            if (strInd < 0) return null
            val strOff = stOff + LEW(xml, sitOff + strInd * 4)
            return compXmlStringAt(xml, strOff)
        }
    
    
        /**
         * Tool Method for decompressXML();
         * Apply indentation
         *
         * @param indent Indentation level
         * @param str String to indent
         * @return Indented string
         */
        fun prtIndent(indent: Int, str: String): String {
    
            return spaces.substring(0, Math.min(indent * 2, spaces.length)) + str
        }
    
    
        /**
         * Tool method for decompressXML()
         * Return the string stored in StringTable format at
         * offset strOff.  This offset points to the 16 bit string length, which
         * is followed by that number of 16 bit (Unicode) chars.
         *
         * @param arr StringTable array
         * @param strOff Offset to get string from
         * @return String from StringTable at offset strOff
         */
        fun compXmlStringAt(arr: ByteArray, strOff: Int): String {
            val strLen = (arr[strOff + 1] shl (8 and 0xff00)) or (arr[strOff].toInt() and 0xff)
            val chars = ByteArray(strLen)
            for (ii in 0 until strLen) {
                chars[ii] = arr[strOff + 2 + ii * 2]
            }
            return String(chars)  // Hack, just use 8 byte chars
        } // end of compXmlStringAt
    
    
        /**
         * Return value of a Little Endian 32 bit word from the byte array
         * at offset off.
         *
         * @param arr Byte array with 32 bit word
         * @param off Offset to get word from
         * @return Value of Little Endian 32 bit word specified
         */
        fun LEW(arr: ByteArray, off: Int): Int {
            return (arr[off + 3] shl 24 and -0x1000000 or ((arr[off + 2] shl 16) and 0xff0000)
                    or (arr[off + 1] shl 8 and 0xff00) or (arr[off].toInt() and 0xFF))
        } // end of LEW
    
        private infix fun Byte.shl(i: Int): Int = (this.toInt() shl i)
        private infix fun Int.shl(i: Int): Int = (this shl i)
    

    This is a kotlin version of the answer above.

    0 讨论(0)
  • 2020-11-22 10:53

    apk-parser, https://github.com/caoqianli/apk-parser, a lightweight impl for java, with no dependency for aapt or other binarys, is good for parse binary xml files, and other apk infos.

    ApkParser apkParser = new ApkParser(new File(filePath));
    // set a locale to translate resource tag into specific strings in language the locale specified, you set locale to Locale.ENGLISH then get apk title 'WeChat' instead of '@string/app_name' for example
    apkParser.setPreferredLocale(locale);
    
    String xml = apkParser.getManifestXml();
    System.out.println(xml);
    
    String xml2 = apkParser.transBinaryXml(xmlPathInApk);
    System.out.println(xml2);
    
    ApkMeta apkMeta = apkParser.getApkMeta();
    System.out.println(apkMeta);
    
    Set<Locale> locales = apkParser.getLocales();
    for (Locale l : locales) {
        System.out.println(l);
    }
    apkParser.close();
    
    0 讨论(0)
  • 2020-11-22 10:54

    With the latest SDK-Tools, you can now use a tool called the apkanalyzer to print out the AndroidManifest.xml of an APK (as well as other parts, such as resources).

    [android sdk]/tools/bin/apkanalyzer manifest print [app.apk]

    apkanalyzer

    0 讨论(0)
  • 2020-11-22 10:55

    In Android studio 2.2 you can directly analyze the apk. Goto build- analyze apk. Select the apk, navigate to androidmanifest.xml. You can see the details of androidmanifest.

    0 讨论(0)
  • 2020-11-22 10:55

    apkanalyzer will be helpful

    @echo off
    
    ::##############################################################################
    ::##
    ::##  apkanalyzer start up script for Windows
    ::##
    ::##  converted by ewwink
    ::##
    ::##############################################################################
    
    ::Attempt to set APP_HOME
    
    SET SAVED=%cd%
    SET APP_HOME=C:\android\sdk\tools
    SET APP_NAME="apkanalyzer"
    
    ::Add default JVM options here. You can also use JAVA_OPTS and APKANALYZER_OPTS to pass JVM options to this script.
    SET DEFAULT_JVM_OPTS=-Dcom.android.sdklib.toolsdir=%APP_HOME%
    
    SET CLASSPATH=%APP_HOME%\lib\dvlib-26.0.0-dev.jar;%APP_HOME%\lib\util-2.2.1.jar;%APP_HOME%\lib\jimfs-1.1.jar;%APP_HOME%\lib\annotations-13.0.jar;%APP_HOME%\lib\ddmlib-26.0.0-dev.jar;%APP_HOME%\lib\repository-26.0.0-dev.jar;%APP_HOME%\lib\sdk-common-26.0.0-dev.jar;%APP_HOME%\lib\kotlin-stdlib-1.1.3-2.jar;%APP_HOME%\lib\protobuf-java-3.0.0.jar;%APP_HOME%\lib\apkanalyzer-cli.jar;%APP_HOME%\lib\gson-2.3.jar;%APP_HOME%\lib\httpcore-4.2.5.jar;%APP_HOME%\lib\dexlib2-2.2.1.jar;%APP_HOME%\lib\commons-compress-1.12.jar;%APP_HOME%\lib\generator.jar;%APP_HOME%\lib\error_prone_annotations-2.0.18.jar;%APP_HOME%\lib\commons-codec-1.6.jar;%APP_HOME%\lib\kxml2-2.3.0.jar;%APP_HOME%\lib\httpmime-4.1.jar;%APP_HOME%\lib\annotations-12.0.jar;%APP_HOME%\lib\bcpkix-jdk15on-1.56.jar;%APP_HOME%\lib\jsr305-3.0.0.jar;%APP_HOME%\lib\explainer.jar;%APP_HOME%\lib\builder-model-3.0.0-dev.jar;%APP_HOME%\lib\baksmali-2.2.1.jar;%APP_HOME%\lib\j2objc-annotations-1.1.jar;%APP_HOME%\lib\layoutlib-api-26.0.0-dev.jar;%APP_HOME%\lib\jcommander-1.64.jar;%APP_HOME%\lib\commons-logging-1.1.1.jar;%APP_HOME%\lib\annotations-26.0.0-dev.jar;%APP_HOME%\lib\builder-test-api-3.0.0-dev.jar;%APP_HOME%\lib\animal-sniffer-annotations-1.14.jar;%APP_HOME%\lib\bcprov-jdk15on-1.56.jar;%APP_HOME%\lib\httpclient-4.2.6.jar;%APP_HOME%\lib\common-26.0.0-dev.jar;%APP_HOME%\lib\jopt-simple-4.9.jar;%APP_HOME%\lib\sdklib-26.0.0-dev.jar;%APP_HOME%\lib\apkanalyzer.jar;%APP_HOME%\lib\shared.jar;%APP_HOME%\lib\binary-resources.jar;%APP_HOME%\lib\guava-22.0.jar
    
    SET APP_ARGS=%*
    ::Collect all arguments for the java command, following the shell quoting and substitution rules
    SET APKANALYZER_OPTS=%DEFAULT_JVM_OPTS% -classpath %CLASSPATH% com.android.tools.apk.analyzer.ApkAnalyzerCli %APP_ARGS%
    
    ::Determine the Java command to use to start the JVM.
    SET JAVACMD="java"
    where %JAVACMD% >nul 2>nul
    if %errorlevel%==1 (
      echo ERROR: 'java' command could be found in your PATH.
      echo Please set the 'java' variable in your environment to match the
      echo location of your Java installation.
      echo.
      exit /b 0
    )
    
    :: execute apkanalyzer
    
    %JAVACMD% %APKANALYZER_OPTS%
    

    original post https://stackoverflow.com/a/51905063/1383521

    0 讨论(0)
  • 2020-11-22 10:58

    What about using the Android Asset Packaging Tool (aapt), from the Android SDK, into a Python (or whatever) script?

    Through the aapt (http://elinux.org/Android_aapt), indeed, you can retrieve information about the .apk package and about its AndroidManifest.xml file. In particular, you can extract the values of individual elements of an .apk package through the 'dump' sub-command. For example, you can extract the user-permissions in the AndroidManifest.xml file inside an .apk package in this way:

    $ aapt dump permissions package.apk
    

    Where package.apk is your .apk package.

    Moreover, you can use the Unix pipe command to clear the output. For example:

    $ aapt dump permissions package.apk | sed 1d | awk '{ print $NF }'
    

    Here a Python script that to that programmatically:

    import os
    import subprocess
    
    #Current directory and file name:
    curpath = os.path.dirname( os.path.realpath(__file__) )
    filepath = os.path.join(curpath, "package.apk")
    
    #Extract the AndroidManifest.xml permissions:
    command = "aapt dump permissions " + filepath + " | sed 1d | awk '{ print $NF }'"
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=None, shell=True)
    permissions = process.communicate()[0]
    
    print permissions
    

    In a similar fashion you can extract other information (e.g. package, app name, etc...) of the AndroidManifest.xml:

    #Extract the APK package info:
    shellcommand = "aapt dump badging " + filepath
    process = subprocess.Popen(shellcommand, stdout=subprocess.PIPE, stderr=None, shell=True)
    apkInfo = process.communicate()[0].splitlines()
    
    for info in apkInfo:
        #Package info:
        if string.find(info, "package:", 0) != -1:
            print "App Package: " + findBetween(info, "name='", "'")
            print "App Version: " + findBetween(info, "versionName='", "'")
            continue
    
        #App name:
        if string.find(info, "application:", 0) != -1:
            print "App Name: " + findBetween(info, "label='", "'")
            continue
    
    
    def findBetween(s, prefix, suffix):
        try:
            start = s.index(prefix) + len(prefix)
            end = s.index(suffix, start)
            return s[start:end]
        except ValueError:
            return ""
    

    If instead you want to parse the entire AndroidManifest XML tree, you can do that in a similar way using the xmltree command:

    aapt dump xmltree package.apk AndroidManifest.xml
    

    Using Python as before:

    #Extract the AndroidManifest XML tree:
    shellcommand = "aapt dump xmltree " + filepath + " AndroidManifest.xml"
    process = subprocess.Popen(shellcommand, stdout=subprocess.PIPE, stderr=None, shell=True)
    xmlTree = process.communicate()[0]
    
    print "Number of Activities: " + str(xmlTree.count("activity"))
    print "Number of Services: " + str(xmlTree.count("service"))
    print "Number of BroadcastReceivers: " + str(xmlTree.count("receiver"))
    
    0 讨论(0)
提交回复
热议问题