Android Obfuscation Analysis: ProGuard, R8, and Native Code Protection
Mamoun Tarsha-Kurdi
6 min read
Introduction
Android obfuscation techniques aim to hinder reverse engineering by renaming symbols, encrypting strings, flattening control flow, and packing native libraries. Understanding deobfuscation methods is essential for security analysis and malware research.
ProGuard/R8 Obfuscation
Symbol Renaming
// Original code
package com.example.banking;
public class AccountManager {
private String accountNumber;
private double balance;
public void transfer(String recipient, double amount) {
if (balance >= amount) {
balance -= amount;
sendTransaction(recipient, amount);
}
}
}
// After ProGuard
package a.b.c;
public class a {
private String a;
private double b;
public void a(String a, double b) {
if (this.b >= b) {
this.b -= b;
b(a, b);
}
}
}
Deobfuscation with Mapping File
# mapping.txt (ProGuard output)
com.example.banking.AccountManager -> a.b.c.a:
java.lang.String accountNumber -> a
double balance -> b
void transfer(java.lang.String,double) -> a
void sendTransaction(java.lang.String,double) -> b
Apply mapping:
#!/usr/bin/env python3
"""
Apply ProGuard mapping to decompiled code
"""
import re
def parse_mapping(mapping_file):
"""Parse ProGuard mapping file"""
mappings = {}
with open(mapping_file, 'r') as f:
current_class = None
for line in f:
line = line.strip()
if ' -> ' in line and ':' in line:
# Class mapping
original, obfuscated = line.split(' -> ')
obfuscated = obfuscated.rstrip(':')
current_class = obfuscated
mappings[obfuscated] = {'name': original, 'members': {}}
elif current_class and ' -> ' in line:
# Member mapping
parts = line.split(' -> ')
member_sig = parts[0].strip()
obfuscated_name = parts[1]
# Parse signature
if '(' in member_sig:
# Method
ret_type, rest = member_sig.split(' ', 1)
method_name = rest.split('(')[0]
mappings[current_class]['members'][obfuscated_name] = method_name
else:
# Field
field_type, field_name = member_sig.rsplit(' ', 1)
mappings[current_class]['members'][obfuscated_name] = field_name
return mappings
def deobfuscate_code(code, mappings):
"""Apply mappings to decompiled code"""
# Replace class names
for obf_class, info in mappings.items():
orig_class = info['name']
code = re.sub(rf'\\b{re.escape(obf_class)}\\b', orig_class, code)
# Replace member names
for obf_member, orig_member in info['members'].items():
code = re.sub(rf'\\b{re.escape(obf_member)}\\b', orig_member, code)
return code
if __name__ == '__main__':
mappings = parse_mapping('mapping.txt')
decompiled = open('decompiled.java').read()
deobfuscated = deobfuscate_code(decompiled, mappings)
print(deobfuscated)
String Encryption
Encrypted Strings Pattern
// Obfuscated code with encrypted strings
public class a {
public void a() {
String s1 = b.a("Gur dhvpx oebja sbk"); // ROT13 encrypted
String s2 = c.a(new byte[]{0x48, 0x65, 0x6c, 0x6c, 0x6f}); // Hex encoded
String s3 = d.a("SGVsbG8gV29ybGQ="); // Base64
}
}
Automated Decryption
#!/usr/bin/env python3
"""
Decrypt Android string obfuscation
"""
import re
import base64
def decrypt_rot13(encrypted):
"""ROT13 decryption"""
decrypted = ""
for char in encrypted:
if 'a' <= char <= 'z':
decrypted += chr((ord(char) - ord('a') + 13) % 26 + ord('a'))
elif 'A' <= char <= 'Z':
decrypted += chr((ord(char) - ord('A') + 13) % 26 + ord('A'))
else:
decrypted += char
return decrypted
def decrypt_base64(encrypted):
"""Base64 decryption"""
return base64.b64decode(encrypted).decode('utf-8')
def decrypt_hex_array(hex_string):
"""Hex byte array decryption"""
# Parse: new byte[]{0x48, 0x65, ...}
hex_values = re.findall(r'0x([0-9a-fA-F]{2})', hex_string)
return ''.join(chr(int(h, 16)) for h in hex_values)
def deobfuscate_strings(code):
"""Find and decrypt all encrypted strings"""
# Pattern: b.a("...")
for match in re.finditer(r'b\.a\("([^"]+)"\)', code):
encrypted = match.group(1)
decrypted = decrypt_rot13(encrypted)
print(f"ROT13: {encrypted} -> {decrypted}")
code = code.replace(match.group(0), f'"{decrypted}"')
# Pattern: c.a(new byte[]{...})
for match in re.finditer(r'c\.a\(new byte\[\]\{([^}]+)\}\)', code):
hex_str = match.group(1)
decrypted = decrypt_hex_array(hex_str)
print(f"HEX: {hex_str} -> {decrypted}")
code = code.replace(match.group(0), f'"{decrypted}"')
# Pattern: d.a("...")
for match in re.finditer(r'd\.a\("([^"]+)"\)', code):
encrypted = match.group(1)
decrypted = decrypt_base64(encrypted)
print(f"BASE64: {encrypted} -> {decrypted}")
code = code.replace(match.group(0), f'"{decrypted}"')
return code
if __name__ == '__main__':
obfuscated = open('obfuscated.java').read()
deobfuscated = deobfuscate_strings(obfuscated)
print(deobfuscated)
Control Flow Flattening
Flattened Code Pattern
// Original code
public int calculate(int x) {
int result = x * 2;
if (result > 10) {
result += 5;
} else {
result -= 3;
}
return result;
}
// After control flow flattening
public int calculate(int x) {
int state = 0;
int result = 0;
while (true) {
switch (state) {
case 0:
result = x * 2;
state = 1;
break;
case 1:
if (result > 10) {
state = 2;
} else {
state = 3;
}
break;
case 2:
result += 5;
state = 4;
break;
case 3:
result -= 3;
state = 4;
break;
case 4:
return result;
}
}
}
Deflattening Technique
#!/usr/bin/env python3
"""
Deflatten control flow obfuscation
"""
import re
def extract_state_machine(code):
"""Parse flattened control flow"""
states = {}
# Find switch cases
for match in re.finditer(r'case (\d+):\s*(.*?)(?:break;|return)', code, re.DOTALL):
state_num = int(match.group(1))
state_code = match.group(2).strip()
states[state_num] = state_code
return states
def build_cfg(states):
"""Build control flow graph"""
cfg = {}
for state_num, code in states.items():
# Find next state assignment
next_match = re.search(r'state = (\d+)', code)
if next_match:
next_state = int(next_match.group(1))
cfg[state_num] = {'code': code, 'next': next_state}
else:
cfg[state_num] = {'code': code, 'next': None}
return cfg
def reconstruct_linear_code(cfg, start_state=0):
"""Reconstruct linear code from CFG"""
reconstructed = []
current = start_state
visited = set()
while current is not None and current not in visited:
visited.add(current)
node = cfg[current]
# Clean up code (remove state assignment)
clean_code = re.sub(r'state = \d+;', '', node['code']).strip()
reconstructed.append(clean_code)
current = node['next']
return '\\n'.join(reconstructed)
if __name__ == '__main__':
flattened_code = open('flattened.java').read()
states = extract_state_machine(flattened_code)
cfg = build_cfg(states)
linear = reconstruct_linear_code(cfg)
print(linear)
Native Library Packing
Packed Library Detection
# Check for packing indicators
file libnative.so
# libnative.so: ELF 64-bit LSB shared object, ARM aarch64
# Check sections
readelf -S libnative.so | grep -E "(PACK|UPX|encrypt)"
# Entropy analysis (high entropy = likely packed/encrypted)
python3 -c "
import sys
import math
from collections import Counter
data = open('libnative.so', 'rb').read()
entropy = -sum(count/len(data) * math.log2(count/len(data))
for count in Counter(data).values())
print(f'Entropy: {entropy:.2f} bits/byte')
# >7.5 bits/byte suggests encryption/packing
"
Dynamic Unpacking
// Frida script: dump_unpacked.js
'use strict';
const moduleName = "libnative.so";
// Hook dlopen to catch unpacking
Interceptor.attach(Module.findExportByName(null, "dlopen"), {
onEnter: function(args) {
const path = args[0].readCString();
console.log(`[*] dlopen: ${path}`);
},
onLeave: function(retval) {
if (!retval.isNull()) {
const module = Process.findModuleByAddress(retval);
console.log(`[*] Loaded: ${module.name} at ${module.base}`);
// Dump unpacked library
const size = module.size;
const buffer = module.base.readByteArray(size);
const file = new File(`/sdcard/${module.name}.unpacked`, 'wb');
file.write(buffer);
file.close();
console.log(`[+] Dumped to /sdcard/${module.name}.unpacked`);
}
}
});
// Alternative: Hook mmap for runtime decryption
Interceptor.attach(Module.findExportByName(null, "mmap"), {
onLeave: function(retval) {
if (!retval.isNull()) {
// Check if mapped region looks like ELF
const magic = retval.readByteArray(4);
const elf_magic = [0x7f, 0x45, 0x4c, 0x46]; // \x7fELF
if (magic && magic[0] === elf_magic[0] &&
magic[1] === elf_magic[1] &&
magic[2] === elf_magic[2] &&
magic[3] === elf_magic[3]) {
console.log(`[!] ELF mapped at ${retval}`);
// Dump decrypted code
const dump_size = 0x100000; // 1MB
const buffer = retval.readByteArray(dump_size);
const file = new File('/sdcard/decrypted_elf.so', 'wb');
file.write(buffer);
file.close();
console.log('[+] Dumped decrypted ELF');
}
}
}
});
Conclusion
Android obfuscation analysis requires systematic approaches to reverse symbol renaming, decrypt strings, deflatten control flow, and unpack native libraries. Combining static analysis tools (ProGuard mappings, entropy analysis) with dynamic instrumentation (Frida runtime dumping) enables comprehensive deobfuscation.
References
- Google (2023). “R8 Code Shrinker”
- ProGuard (2023). “ProGuard Manual”
- Tigress (2023). “Control Flow Obfuscation”
- Quarkslab (2020). “Android Native Library Unpacking”