This file appears to be in a binary XML format. What is this format and how can it be parsed programmatically (as opposed to using the aapt dump tool in the SDK)?
@Mathieu Kotlin version follows:
fun main(args : Array<String>) {
val fileName = "app.apk"
ZipFile(fileName).use { zip ->
zip.entries().asSequence().forEach { entry ->
if(entry.name == "AndroidManifest.xml") {
zip.getInputStream(entry).use { input ->
val xml = decompressXML(input.readBytes())
//TODO: parse the XML
println(xml)
}
}
}
}
}
/**
* Binary XML doc ending Tag
*/
var endDocTag = 0x00100101
/**
* Binary XML start Tag
*/
var startTag = 0x00100102
/**
* Binary XML end Tag
*/
var endTag = 0x00100103
/**
* Reference var for spacing
* Used in prtIndent()
*/
var spaces = " "
/**
* Parse the 'compressed' binary form of Android XML docs
* such as for AndroidManifest.xml in .apk files
* Source: http://stackoverflow.com/questions/2097813/how-to-parse-the-androidmanifest-xml-file-inside-an-apk-package/4761689#4761689
*
* @param xml Encoded XML content to decompress
*/
fun decompressXML(xml: ByteArray): String {
val resultXml = StringBuilder()
// Compressed XML file/bytes starts with 24x bytes of data,
// 9 32 bit words in little endian order (LSB first):
// 0th word is 03 00 08 00
// 3rd word SEEMS TO BE: Offset at then of StringTable
// 4th word is: Number of strings in string table
// WARNING: Sometime I indiscriminently display or refer to word in
// little endian storage format, or in integer format (ie MSB first).
val numbStrings = LEW(xml, 4 * 4)
// StringIndexTable starts at offset 24x, an array of 32 bit LE offsets
// of the length/string data in the StringTable.
val sitOff = 0x24 // Offset of start of StringIndexTable
// StringTable, each string is represented with a 16 bit little endian
// character count, followed by that number of 16 bit (LE) (Unicode) chars.
val stOff = sitOff + numbStrings * 4 // StringTable follows StrIndexTable
// XMLTags, The XML tag tree starts after some unknown content after the
// StringTable. There is some unknown data after the StringTable, scan
// forward from this point to the flag for the start of an XML start tag.
var xmlTagOff = LEW(xml, 3 * 4) // Start from the offset in the 3rd word.
// Scan forward until we find the bytes: 0x02011000(x00100102 in normal int)
run {
var ii = xmlTagOff
while (ii < xml.size - 4) {
if (LEW(xml, ii) == startTag) {
xmlTagOff = ii
break
}
ii += 4
}
} // end of hack, scanning for start of first start tag
// XML tags and attributes:
// Every XML start and end tag consists of 6 32 bit words:
// 0th word: 02011000 for startTag and 03011000 for endTag
// 1st word: a flag?, like 38000000
// 2nd word: Line of where this tag appeared in the original source file
// 3rd word: FFFFFFFF ??
// 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS
// 5th word: StringIndex of Element Name
// (Note: 01011000 in 0th word means end of XML document, endDocTag)
// Start tags (not end tags) contain 3 more words:
// 6th word: 14001400 meaning??
// 7th word: Number of Attributes that follow this tag(follow word 8th)
// 8th word: 00000000 meaning??
// Attributes consist of 5 words:
// 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF
// 1st word: StringIndex of Attribute Name
// 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId used
// 3rd word: Flags?
// 4th word: str ind of attr value again, or ResourceId of value
// TMP, dump string table to tr for debugging
//tr.addSelect("strings", null);
//for (int ii=0; ii<numbStrings; ii++) {
// // Length of string starts at StringTable plus offset in StrIndTable
// String str = compXmlString(xml, sitOff, stOff, ii);
// tr.add(String.valueOf(ii), str);
//}
//tr.parent();
// Step through the XML tree element tags and attributes
var off = xmlTagOff
var indent = 0
var startTagLineNo = -2
while (off < xml.size) {
val tag0 = LEW(xml, off)
//int tag1 = LEW(xml, off+1*4);
val lineNo = LEW(xml, off + 2 * 4)
//int tag3 = LEW(xml, off+3*4);
val nameNsSi = LEW(xml, off + 4 * 4)
val nameSi = LEW(xml, off + 5 * 4)
if (tag0 == startTag) { // XML START TAG
val tag6 = LEW(xml, off + 6 * 4) // Expected to be 14001400
val numbAttrs = LEW(xml, off + 7 * 4) // Number of Attributes to follow
//int tag8 = LEW(xml, off+8*4); // Expected to be 00000000
off += 9 * 4 // Skip over 6+3 words of startTag data
val name = compXmlString(xml, sitOff, stOff, nameSi)
//tr.addSelect(name, null);
startTagLineNo = lineNo
// Look for the Attributes
val sb = StringBuffer()
for (ii in 0 until numbAttrs) {
val attrNameNsSi = LEW(xml, off) // AttrName Namespace Str Ind, or FFFFFFFF
val attrNameSi = LEW(xml, off + 1 * 4) // AttrName String Index
val attrValueSi = LEW(xml, off + 2 * 4) // AttrValue Str Ind, or FFFFFFFF
val attrFlags = LEW(xml, off + 3 * 4)
val attrResId = LEW(xml, off + 4 * 4) // AttrValue ResourceId or dup AttrValue StrInd
off += 5 * 4 // Skip over the 5 words of an attribute
val attrName = compXmlString(xml, sitOff, stOff, attrNameSi)
val attrValue = if (attrValueSi != -1)
compXmlString(xml, sitOff, stOff, attrValueSi)
else
"resourceID 0x" + Integer.toHexString(attrResId)
sb.append(" $attrName=\"$attrValue\"")
//tr.add(attrName, attrValue);
}
resultXml.append(prtIndent(indent, "<$name$sb>"))
indent++
} else if (tag0 == endTag) { // XML END TAG
indent--
off += 6 * 4 // Skip over 6 words of endTag data
val name = compXmlString(xml, sitOff, stOff, nameSi)
resultXml.append(prtIndent(indent, "</$name> (line $startTagLineNo-$lineNo)"))
//tr.parent(); // Step back up the NobTree
} else if (tag0 == endDocTag) { // END OF XML DOC TAG
break
} else {
println(" Unrecognized tag code '" + Integer.toHexString(tag0)
+ "' at offset " + off
)
break
}
} // end of while loop scanning tags and attributes of XML tree
println(" end at offset $off")
return resultXml.toString()
} // end of decompressXML
/**
* Tool Method for decompressXML();
* Compute binary XML to its string format
* Source: Source: http://stackoverflow.com/questions/2097813/how-to-parse-the-androidmanifest-xml-file-inside-an-apk-package/4761689#4761689
*
* @param xml Binary-formatted XML
* @param sitOff
* @param stOff
* @param strInd
* @return String-formatted XML
*/
fun compXmlString(xml: ByteArray, sitOff: Int, stOff: Int, strInd: Int): String? {
if (strInd < 0) return null
val strOff = stOff + LEW(xml, sitOff + strInd * 4)
return compXmlStringAt(xml, strOff)
}
/**
* Tool Method for decompressXML();
* Apply indentation
*
* @param indent Indentation level
* @param str String to indent
* @return Indented string
*/
fun prtIndent(indent: Int, str: String): String {
return spaces.substring(0, Math.min(indent * 2, spaces.length)) + str
}
/**
* Tool method for decompressXML()
* Return the string stored in StringTable format at
* offset strOff. This offset points to the 16 bit string length, which
* is followed by that number of 16 bit (Unicode) chars.
*
* @param arr StringTable array
* @param strOff Offset to get string from
* @return String from StringTable at offset strOff
*/
fun compXmlStringAt(arr: ByteArray, strOff: Int): String {
val strLen = (arr[strOff + 1] shl (8 and 0xff00)) or (arr[strOff].toInt() and 0xff)
val chars = ByteArray(strLen)
for (ii in 0 until strLen) {
chars[ii] = arr[strOff + 2 + ii * 2]
}
return String(chars) // Hack, just use 8 byte chars
} // end of compXmlStringAt
/**
* Return value of a Little Endian 32 bit word from the byte array
* at offset off.
*
* @param arr Byte array with 32 bit word
* @param off Offset to get word from
* @return Value of Little Endian 32 bit word specified
*/
fun LEW(arr: ByteArray, off: Int): Int {
return (arr[off + 3] shl 24 and -0x1000000 or ((arr[off + 2] shl 16) and 0xff0000)
or (arr[off + 1] shl 8 and 0xff00) or (arr[off].toInt() and 0xFF))
} // end of LEW
private infix fun Byte.shl(i: Int): Int = (this.toInt() shl i)
private infix fun Int.shl(i: Int): Int = (this shl i)
This is a kotlin version of the answer above.
apk-parser, https://github.com/caoqianli/apk-parser, a lightweight impl for java, with no dependency for aapt or other binarys, is good for parse binary xml files, and other apk infos.
ApkParser apkParser = new ApkParser(new File(filePath));
// set a locale to translate resource tag into specific strings in language the locale specified, you set locale to Locale.ENGLISH then get apk title 'WeChat' instead of '@string/app_name' for example
apkParser.setPreferredLocale(locale);
String xml = apkParser.getManifestXml();
System.out.println(xml);
String xml2 = apkParser.transBinaryXml(xmlPathInApk);
System.out.println(xml2);
ApkMeta apkMeta = apkParser.getApkMeta();
System.out.println(apkMeta);
Set<Locale> locales = apkParser.getLocales();
for (Locale l : locales) {
System.out.println(l);
}
apkParser.close();
With the latest SDK-Tools, you can now use a tool called the apkanalyzer to print out the AndroidManifest.xml of an APK (as well as other parts, such as resources).
[android sdk]/tools/bin/apkanalyzer manifest print [app.apk]
apkanalyzer
In Android studio 2.2 you can directly analyze the apk. Goto build- analyze apk. Select the apk, navigate to androidmanifest.xml. You can see the details of androidmanifest.
apkanalyzer will be helpful
@echo off
::##############################################################################
::##
::## apkanalyzer start up script for Windows
::##
::## converted by ewwink
::##
::##############################################################################
::Attempt to set APP_HOME
SET SAVED=%cd%
SET APP_HOME=C:\android\sdk\tools
SET APP_NAME="apkanalyzer"
::Add default JVM options here. You can also use JAVA_OPTS and APKANALYZER_OPTS to pass JVM options to this script.
SET DEFAULT_JVM_OPTS=-Dcom.android.sdklib.toolsdir=%APP_HOME%
SET CLASSPATH=%APP_HOME%\lib\dvlib-26.0.0-dev.jar;%APP_HOME%\lib\util-2.2.1.jar;%APP_HOME%\lib\jimfs-1.1.jar;%APP_HOME%\lib\annotations-13.0.jar;%APP_HOME%\lib\ddmlib-26.0.0-dev.jar;%APP_HOME%\lib\repository-26.0.0-dev.jar;%APP_HOME%\lib\sdk-common-26.0.0-dev.jar;%APP_HOME%\lib\kotlin-stdlib-1.1.3-2.jar;%APP_HOME%\lib\protobuf-java-3.0.0.jar;%APP_HOME%\lib\apkanalyzer-cli.jar;%APP_HOME%\lib\gson-2.3.jar;%APP_HOME%\lib\httpcore-4.2.5.jar;%APP_HOME%\lib\dexlib2-2.2.1.jar;%APP_HOME%\lib\commons-compress-1.12.jar;%APP_HOME%\lib\generator.jar;%APP_HOME%\lib\error_prone_annotations-2.0.18.jar;%APP_HOME%\lib\commons-codec-1.6.jar;%APP_HOME%\lib\kxml2-2.3.0.jar;%APP_HOME%\lib\httpmime-4.1.jar;%APP_HOME%\lib\annotations-12.0.jar;%APP_HOME%\lib\bcpkix-jdk15on-1.56.jar;%APP_HOME%\lib\jsr305-3.0.0.jar;%APP_HOME%\lib\explainer.jar;%APP_HOME%\lib\builder-model-3.0.0-dev.jar;%APP_HOME%\lib\baksmali-2.2.1.jar;%APP_HOME%\lib\j2objc-annotations-1.1.jar;%APP_HOME%\lib\layoutlib-api-26.0.0-dev.jar;%APP_HOME%\lib\jcommander-1.64.jar;%APP_HOME%\lib\commons-logging-1.1.1.jar;%APP_HOME%\lib\annotations-26.0.0-dev.jar;%APP_HOME%\lib\builder-test-api-3.0.0-dev.jar;%APP_HOME%\lib\animal-sniffer-annotations-1.14.jar;%APP_HOME%\lib\bcprov-jdk15on-1.56.jar;%APP_HOME%\lib\httpclient-4.2.6.jar;%APP_HOME%\lib\common-26.0.0-dev.jar;%APP_HOME%\lib\jopt-simple-4.9.jar;%APP_HOME%\lib\sdklib-26.0.0-dev.jar;%APP_HOME%\lib\apkanalyzer.jar;%APP_HOME%\lib\shared.jar;%APP_HOME%\lib\binary-resources.jar;%APP_HOME%\lib\guava-22.0.jar
SET APP_ARGS=%*
::Collect all arguments for the java command, following the shell quoting and substitution rules
SET APKANALYZER_OPTS=%DEFAULT_JVM_OPTS% -classpath %CLASSPATH% com.android.tools.apk.analyzer.ApkAnalyzerCli %APP_ARGS%
::Determine the Java command to use to start the JVM.
SET JAVACMD="java"
where %JAVACMD% >nul 2>nul
if %errorlevel%==1 (
echo ERROR: 'java' command could be found in your PATH.
echo Please set the 'java' variable in your environment to match the
echo location of your Java installation.
echo.
exit /b 0
)
:: execute apkanalyzer
%JAVACMD% %APKANALYZER_OPTS%
original post https://stackoverflow.com/a/51905063/1383521
What about using the Android Asset Packaging Tool (aapt), from the Android SDK, into a Python (or whatever) script?
Through the aapt (http://elinux.org/Android_aapt), indeed, you can retrieve information about the .apk package and about its AndroidManifest.xml file. In particular, you can extract the values of individual elements of an .apk package through the 'dump' sub-command. For example, you can extract the user-permissions in the AndroidManifest.xml file inside an .apk package in this way:
$ aapt dump permissions package.apk
Where package.apk is your .apk package.
Moreover, you can use the Unix pipe command to clear the output. For example:
$ aapt dump permissions package.apk | sed 1d | awk '{ print $NF }'
Here a Python script that to that programmatically:
import os
import subprocess
#Current directory and file name:
curpath = os.path.dirname( os.path.realpath(__file__) )
filepath = os.path.join(curpath, "package.apk")
#Extract the AndroidManifest.xml permissions:
command = "aapt dump permissions " + filepath + " | sed 1d | awk '{ print $NF }'"
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=None, shell=True)
permissions = process.communicate()[0]
print permissions
In a similar fashion you can extract other information (e.g. package, app name, etc...) of the AndroidManifest.xml:
#Extract the APK package info:
shellcommand = "aapt dump badging " + filepath
process = subprocess.Popen(shellcommand, stdout=subprocess.PIPE, stderr=None, shell=True)
apkInfo = process.communicate()[0].splitlines()
for info in apkInfo:
#Package info:
if string.find(info, "package:", 0) != -1:
print "App Package: " + findBetween(info, "name='", "'")
print "App Version: " + findBetween(info, "versionName='", "'")
continue
#App name:
if string.find(info, "application:", 0) != -1:
print "App Name: " + findBetween(info, "label='", "'")
continue
def findBetween(s, prefix, suffix):
try:
start = s.index(prefix) + len(prefix)
end = s.index(suffix, start)
return s[start:end]
except ValueError:
return ""
If instead you want to parse the entire AndroidManifest XML tree, you can do that in a similar way using the xmltree command:
aapt dump xmltree package.apk AndroidManifest.xml
Using Python as before:
#Extract the AndroidManifest XML tree:
shellcommand = "aapt dump xmltree " + filepath + " AndroidManifest.xml"
process = subprocess.Popen(shellcommand, stdout=subprocess.PIPE, stderr=None, shell=True)
xmlTree = process.communicate()[0]
print "Number of Activities: " + str(xmlTree.count("activity"))
print "Number of Services: " + str(xmlTree.count("service"))
print "Number of BroadcastReceivers: " + str(xmlTree.count("receiver"))