Android Native Library Reverse Engineering: ARM Assembly to C++ Reconstruction
Mamoun Tarsha-Kurdi
8 min read
Introduction
Android native libraries (JNI) contain performance-critical code often implementing cryptography, DRM, and proprietary algorithms. Reversing these ARM/ARM64 binaries requires understanding JNI conventions, ARM calling standards, and C++ ABI internals.
ARM Architecture Fundamentals
ARM64 Registers and Calling Convention
General Purpose Registers:
x0-x7 : Function arguments / return values
x8 : Indirect result location
x9-x15 : Temporary registers
x16-x17 : Intra-procedure-call scratch registers
x18 : Platform register
x19-x28 : Callee-saved registers
x29 : Frame pointer (FP)
x30 : Link register (LR)
sp : Stack pointer
pc : Program counter
Calling convention:
; Function: int add(int a, int b, int c, int d)
;
; Parameters:
; w0 = a (32-bit)
; w1 = b
; w2 = c
; w3 = d
;
; Return value in w0
add_function:
add w0, w0, w1 ; a + b
add w0, w0, w2 ; + c
add w0, w0, w3 ; + d
ret ; Return to caller (LR)
ARM64 Instruction Set
Common instructions in native libraries:
; Load/Store
ldr x0, [x1] ; Load 64-bit from [x1] into x0
ldr w0, [x1, #4] ; Load 32-bit from [x1+4]
str x0, [x1] ; Store x0 to [x1]
stp x29, x30, [sp, #-16]! ; Store pair (push FP, LR)
ldp x29, x30, [sp], #16 ; Load pair (pop FP, LR)
; Arithmetic
add x0, x1, x2 ; x0 = x1 + x2
sub x0, x1, #0x10 ; x0 = x1 - 16
mul x0, x1, x2 ; x0 = x1 * x2
; Logic
and x0, x1, x2 ; x0 = x1 & x2
orr x0, x1, x2 ; x0 = x1 | x2
eor x0, x1, x2 ; x0 = x1 ^ x2
; Branches
b label ; Unconditional branch
bl function ; Branch with link (call)
ret ; Return (br x30)
cbz x0, label ; Compare and branch if zero
cbnz x0, label ; Compare and branch if not zero
; Comparisons
cmp x0, x1 ; Compare (sets flags)
b.eq label ; Branch if equal
b.ne label ; Branch if not equal
b.lt label ; Branch if less than
b.gt label ; Branch if greater than
JNI Function Analysis
JNI Naming Convention
Java_<package>_<class>_<method>
Example:
Java package: com.example.crypto
Class: NativeCrypto
Method: decrypt
JNI function: Java_com_example_crypto_NativeCrypto_decrypt
JNI Function Signatures
// Basic JNI function
JNIEXPORT jstring JNICALL
Java_com_example_Crypto_encrypt(
JNIEnv* env, // x0: JNI environment
jobject thiz, // x1: 'this' object
jstring input // x2: String parameter
);
// Static JNI function
JNIEXPORT jint JNICALL
Java_com_example_Crypto_initialize(
JNIEnv* env, // x0
jclass clazz // x1: Class object (not 'this')
);
// Multiple parameters
JNIEXPORT jbyteArray JNICALL
Java_com_example_Crypto_process(
JNIEnv* env, // x0
jobject thiz, // x1
jbyteArray data, // x2
jint length, // x3
jint mode // x4
);
Reversing JNI Functions in Ghidra
// Ghidra decompilation of JNI function
undefined8 Java_com_example_Crypto_decrypt(long env, long thiz, long encrypted) {
long decrypted;
int len;
byte* enc_buf;
byte* dec_buf;
// GetByteArrayLength (offset 171 in JNINativeInterface)
len = *(code**)(*(long*)env + 0x558)(env, encrypted);
// GetByteArrayElements (offset 183)
enc_buf = *(code**)(*(long*)env + 0x5b8)(env, encrypted, (long*)0x0);
// Allocate output buffer
dec_buf = (byte*)malloc(len);
// Decrypt (custom algorithm)
for (int i = 0; i < len; i++) {
dec_buf[i] = enc_buf[i] ^ 0xAA; // Simple XOR
}
// ReleaseByteArrayElements (offset 184)
*(code**)(*(long*)env + 0x5c0)(env, encrypted, enc_buf, 0);
// NewByteArray (offset 175)
decrypted = *(code**)(*(long*)env + 0x578)(env, len);
// SetByteArrayRegion (offset 186)
*(code**)(*(long*)env + 0x5d0)(env, decrypted, 0, len, dec_buf);
free(dec_buf);
return decrypted;
}
JNIEnv Function Table
// JNINativeInterface structure (partial)
struct JNINativeInterface {
void* reserved0;
// ... (many entries)
// Array operations
jsize (*GetArrayLength)(JNIEnv*, jarray); // Offset 171
jobjectArray (*NewObjectArray)(JNIEnv*, jsize, jclass, jobject); // 172
jobject (*GetObjectArrayElement)(JNIEnv*, jobjectArray, jsize); // 173
// Byte array
jbyteArray (*NewByteArray)(JNIEnv*, jsize); // 175
// ... other primitive arrays
// Get primitive array elements
jboolean* (*GetBooleanArrayElements)(JNIEnv*, jbooleanArray, jboolean*); // 179
jbyte* (*GetByteArrayElements)(JNIEnv*, jbyteArray, jboolean*); // 183
// Release primitive array elements
void (*ReleaseBooleanArrayElements)(JNIEnv*, jbooleanArray, jboolean*, jint); // 180
void (*ReleaseByteArrayElements)(JNIEnv*, jbyteArray, jbyte*, jint); // 184
};
// Calculate offset
// GetByteArrayElements is at index 183
// Offset = 183 * sizeof(void*) = 183 * 8 = 0x5B8
C++ ABI Reverse Engineering
Name Demangling
# Mangled names in shared library
nm -D libnative.so | grep "_Z"
000000000001234 T _ZN7Example6Crypto7decryptERKNSt6stringE
# Demangle
c++filt _ZN7Example6Crypto7decryptERKNSt6stringE
# Output: Example::Crypto::decrypt(std::string const&)
Vtable Reconstruction
// C++ class with virtual methods
class Crypto {
public:
virtual ~Crypto();
virtual int encrypt(const char* data, int len);
virtual int decrypt(const char* data, int len);
private:
uint8_t key[16];
};
// Disassembly shows vtable
/*
.data:0000000000010000 vtable_Crypto:
.data:0000000000010000 dq offset _ZN6CryptoD1Ev ; destructor
.data:0000000000010008 dq offset _ZN6CryptoD0Ev ; deleting destructor
.data:0000000000010010 dq offset _ZN6Crypto7encryptEPKci ; encrypt
.data:0000000000010018 dq offset _ZN6Crypto7decryptEPKci ; decrypt
*/
// Ghidra struct
struct Crypto_vtable {
void (*destructor)(Crypto*);
void (*deleting_destructor)(Crypto*);
int (*encrypt)(Crypto*, const char*, int);
int (*decrypt)(Crypto*, const char*, int);
};
struct Crypto {
Crypto_vtable* vtable; // Offset 0
uint8_t key[16]; // Offset 8
};
Virtual Function Call Analysis
; C++ code: obj->decrypt(data, len)
;
; Disassembly:
ldr x8, [x0] ; Load vtable pointer from object
ldr x9, [x8, #0x18] ; Load decrypt function pointer (offset 0x18)
blr x9 ; Call decrypt
; Reconstruct:
; 1. x0 = this pointer (Crypto object)
; 2. vtable at offset 0
; 3. decrypt() at vtable offset 0x18 (3rd virtual function)
Static Analysis Workflow
IDA Pro Analysis
# IDA Python script: analyze_jni.py
import idaapi
import idc
import idautils
def find_jni_functions():
"""Find all JNI functions in binary"""
jni_funcs = []
for func_ea in idautils.Functions():
func_name = idc.get_func_name(func_ea)
if func_name.startswith("Java_"):
jni_funcs.append((func_ea, func_name))
return jni_funcs
def parse_jni_name(jni_name):
"""Parse JNI function name"""
# Java_com_example_Class_method
parts = jni_name.replace("Java_", "").split("_")
package = ".".join(parts[:-2])
class_name = parts[-2]
method = parts[-1]
return {
'package': package,
'class': class_name,
'method': method
}
def analyze_jni_function(func_ea):
"""Analyze JNI function parameters"""
func = idaapi.get_func(func_ea)
# ARM64: Parameters in x0-x7
# x0 = JNIEnv*
# x1 = jobject/jclass
# x2+ = actual parameters
print(f"[*] Analyzing {idc.get_func_name(func_ea)}")
# Find JNI API calls
for head in idautils.Heads(func.start_ea, func.end_ea):
if idc.print_insn_mnem(head) == "BLR":
# Indirect call (likely JNI function)
print(f" JNI call at {hex(head)}")
if __name__ == "__main__":
jni_funcs = find_jni_functions()
for ea, name in jni_funcs:
info = parse_jni_name(name)
print(f"\n{info['package']}.{info['class']}.{info['method']}()")
analyze_jni_function(ea)
Ghidra Scripting
// Ghidra script: JNI_Analyzer.java
import ghidra.app.script.GhidraScript;
import ghidra.program.model.listing.*;
import ghidra.program.model.symbol.*;
public class JNI_Analyzer extends GhidraScript {
@Override
public void run() throws Exception {
SymbolTable symTable = currentProgram.getSymbolTable();
for (Symbol sym : symTable.getAllSymbols(true)) {
String name = sym.getName();
if (name.startsWith("Java_")) {
analyzeJNIFunction(sym.getAddress(), name);
}
}
}
private void analyzeJNIFunction(Address addr, String name) {
println("\\n[*] " + name);
Function func = getFunctionAt(addr);
if (func == null) {
println(" Not a function");
return;
}
// Set function signature
// JNIEXPORT <type> JNICALL name(JNIEnv*, jobject, ...)
String signature = "undefined8 " + name + "(long env, long thiz)";
println(" Signature: " + signature);
// TODO: Parse parameters from Java signature
}
}
Dynamic Analysis Techniques
Frida Hooking
// Hook JNI function
'use strict';
const moduleName = "libnative.so";
const jniFunc = "Java_com_example_Crypto_decrypt";
const addr = Module.findExportByName(moduleName, jniFunc);
console.log(`[*] ${jniFunc} at ${addr}`);
Interceptor.attach(addr, {
onEnter: function(args) {
// args[0] = JNIEnv*
// args[1] = jobject thiz
// args[2] = jbyteArray encrypted
const env = args[0];
const encrypted = args[2];
// Get array length
const GetArrayLength = new NativeFunction(
env.add(171 * Process.pointerSize).readPointer(),
'int', ['pointer', 'pointer']
);
const len = GetArrayLength(env, encrypted);
// Get array elements
const GetByteArrayElements = new NativeFunction(
env.add(183 * Process.pointerSize).readPointer(),
'pointer', ['pointer', 'pointer', 'pointer']
);
const buf = GetByteArrayElements(env, encrypted, NULL);
console.log("[*] Encrypted data:");
console.log(hexdump(buf, { length: len }));
// Save for onLeave
this.env = env;
this.len = len;
},
onLeave: function(retval) {
// retval = jbyteArray (decrypted)
const GetByteArrayElements = new NativeFunction(
this.env.add(183 * Process.pointerSize).readPointer(),
'pointer', ['pointer', 'pointer', 'pointer']
);
const buf = GetByteArrayElements(this.env, retval, NULL);
console.log("[*] Decrypted data:");
console.log(hexdump(buf, { length: this.len }));
}
});
Automated Reconstruction
Decompiler Output Cleanup
#!/usr/bin/env python3
"""
Clean up Ghidra decompiler output for readability
"""
import re
def cleanup_ghidra_code(code):
"""Transform Ghidra output to readable C"""
# Replace undefined types
code = re.sub(r'undefined8', 'uint64_t', code)
code = re.sub(r'undefined4', 'uint32_t', code)
code = re.sub(r'undefined', 'uint8_t', code)
# Replace JNI function table accesses
jni_functions = {
'0x558': 'GetByteArrayLength',
'0x5b8': 'GetByteArrayElements',
'0x5c0': 'ReleaseByteArrayElements',
'0x578': 'NewByteArray',
'0x5d0': 'SetByteArrayRegion',
}
for offset, name in jni_functions.items():
pattern = rf'\*\(code\*\*\)\(\*\(long\*\)env \+ {offset}\)'
code = re.sub(pattern, name, code)
# Clean up casts
code = re.sub(r'\(long\*\)0x0', 'NULL', code)
return code
if __name__ == '__main__':
ghidra_output = open('decompiled.c').read()
cleaned = cleanup_ghidra_code(ghidra_output)
print(cleaned)
Conclusion
Android native library reverse engineering combines ARM architecture knowledge, JNI internals understanding, and C++ ABI familiarity. Systematic analysis using IDA/Ghidra for static analysis and Frida for dynamic instrumentation enables reconstruction of proprietary algorithms and discovery of vulnerabilities in compiled code.
References
- ARM (2022). “ARM Architecture Reference Manual”
- Oracle (2023). “Java Native Interface Specification”
- Itanium (2023). “Itanium C++ ABI”
- Google (2023). “Android NDK Documentation”