- java字节码作为中间表示(使用javap命令查看)
Classfile /home/jiansun/mydemo/sootdemo/Add.class
Last modified 2019-8-7; size 488 bytes
MD5 checksum 74331c11abb06b528003d93bae6e9d83
Compiled from "Add.java"
public class Add
minor version: 0
major version: 52
flags: ACC_PUBLIC, ACC_SUPER
Constant pool:
#1 = Methodref #6.#17 // java/lang/Object."<init>":()V
#2 = Methodref #5.#18 // Add.addTwoNum:(II)I
#3 = Fieldref #19.#20 // java/lang/System.out:Ljava/io/PrintStream;
#4 = Methodref #21.#22 // java/io/PrintStream.println:(I)V
#5 = Class #23 // Add
#6 = Class #24 // java/lang/Object
#7 = Utf8 <init>
#8 = Utf8 ()V
#9 = Utf8 Code
#10 = Utf8 LineNumberTable
#11 = Utf8 main
#12 = Utf8 ([Ljava/lang/String;)V
#13 = Utf8 addTwoNum
#14 = Utf8 (II)I
#15 = Utf8 SourceFile
#16 = Utf8 Add.java
#17 = NameAndType #7:#8 // "<init>":()V
#18 = NameAndType #13:#14 // addTwoNum:(II)I
#19 = Class #25 // java/lang/System
#20 = NameAndType #26:#27 // out:Ljava/io/PrintStream;
#21 = Class #28 // java/io/PrintStream
#22 = NameAndType #29:#30 // println:(I)V
#23 = Utf8 Add
#24 = Utf8 java/lang/Object
#25 = Utf8 java/lang/System
#26 = Utf8 out
#27 = Utf8 Ljava/io/PrintStream;
#28 = Utf8 java/io/PrintStream
#29 = Utf8 println
#30 = Utf8 (I)V
{
public Add();
descriptor: ()V
flags: ACC_PUBLIC
Code:
stack=1, locals=1, args_size=1
0: aload_0
1: invokespecial #1 // Method java/lang/Object."<init>":()V
4: return
LineNumberTable:
line 1: 0
public static void main(java.lang.String[]);
descriptor: ([Ljava/lang/String;)V
flags: ACC_PUBLIC, ACC_STATIC
Code:
stack=2, locals=4, args_size=1
0: bipush 13
2: istore_2
3: bipush 17
5: istore_3
6: iload_2
7: iload_3
8: invokestatic #2 // Method addTwoNum:(II)I
11: istore_1
12: getstatic #3 // Field java/lang/System.out:Ljava/io/PrintStream;
15: iload_1
16: invokevirtual #4 // Method java/io/PrintStream.println:(I)V
19: return
LineNumberTable:
line 4: 0
line 5: 3
line 6: 6
line 7: 12
line 8: 19
public static int addTwoNum(int, int);
descriptor: (II)I
flags: ACC_PUBLIC, ACC_STATIC
Code:
stack=2, locals=4, args_size=2
0: bipush 7
2: istore_2
3: bipush 8
5: istore_3
6: iload_0
7: iload_1
8: iadd
9: iconst_2
10: imul
11: ireturn
LineNumberTable:
line 11: 0
line 12: 3
line 13: 6
}
SourceFile: "Add.java"
- baf中间表示
- 把200多种不同的字节码精简为60种
- 没有常量池,直接通过函数名和字段名访问方法和字段
- 使用word/dword显式定义局部变量(64/32位)
- 无需过多关注变量类型,例如int、short、long等相加的指令,在baf中统称为相加指令
public class Add extends java.lang.Object
{
public void <init>()
{
word r0;
r0 := @this: Add;
load.r r0;
specialinvoke <java.lang.Object: void <init>()>;
return;
}
public static void main(java.lang.String[])
{
word r0;
r0 := @parameter0: java.lang.String[];
push 13;
push 17;
staticinvoke <Add: int addTwoNum(int,int)>;
store.i r0;
staticget <java.lang.System: java.io.PrintStream out>;
load.i r0;
virtualinvoke <java.io.PrintStream: void println(int)>;
return;
}
public static int addTwoNum(int, int)
{
word i0, i1, b2;
i0 := @parameter0: int;
i1 := @parameter1: int;
push 7;
store.b b2;
push 8;
store.b b2;
load.i i0;
load.i i1;
add.i;
push 2;
mul.i;
return.i;
}
}
-
jimple作为中间表示
soot中Jimple语句类型
负责过程内控制流的语句:IfStmt , GotoStmt , TableSwitchStmt , LookupSwitchStmt
负责过程间的控制流语句:InvokeStmt , ReturnStmt , ReturnVoidStmt.
监控语句:EnterMonitorStmt , ExitMonitorStmt.
ThrowStmt , RetStmt
jimple是类型化的、三地址的、基于语句的中间代码
Java字节码有大约200个不同的字节码指令,BAF大约有60个,而JIMPLE有15个。
因为其紧凑型,利于分析和优化。
有类型和命名的局部变量可以改进分析
运算符是无类型的,而baf的运算符是有类型的(add.i,add.f,add.d或add.l)。在转换成baf时,不再需要有类型的运算符(因为局部变量有类型)。
Map m = new HashMap();
m.get("key");
//becomes the following JIMPLE code:
java.util.HashMap $r1, r2;
$r1 = new java.util.HashMap;
specialinvoke $r1.<java.util.HashMap: void <init>()>();
r2 = $r1;
//这里知道了r2的类型是hashmap,如果不知道的话,interfaceinvoke可以映射到实现Map接口的任何方法。
interfaceinvoke r2.<java.util.Map:
java.lang.Object get(java.lang.Object)>("key");
- 局部变量都在方法的顶部声明,包括引用数据类型和基本数据类型。
- identity语句定义了使用特殊值预加载的局部变量,如this或方法参数
- jimple类似于简单的Java代码(所以叫做jimple)
-
赋值语句占主导地位
jimple代码优化之后,可以重新转化成字节码文件,在虚拟中运行。
public class Add extends java.lang.Object
{
public void <init>()
{
Add r0;
r0 := @this: Add;
specialinvoke r0.<java.lang.Object: void <init>()>();
return;
}
public static void main(java.lang.String[])
{
java.lang.String[] r0;
byte b0, b1;
int i2;
java.io.PrintStream $r1;
r0 := @parameter0: java.lang.String[];
b0 = 13;
b1 = 17;
i2 = staticinvoke <Add: int addTwoNum(int,int)>(b0, b1);
$r1 = <java.lang.System: java.io.PrintStream out>;
virtualinvoke $r1.<java.io.PrintStream: void println(int)>(i2);
return;
}
public static int addTwoNum(int, int)
{
int i0, i1, $i4, $i5;
byte b2, b3;
i0 := @parameter0: int;
i1 := @parameter1: int;
b2 = 7;
b3 = 8;
$i4 = i0 + i1;
$i5 = $i4 * 2;
return $i5;
}
}
-
Grimp是更易阅读的中间表示(更像是反编译的java代码,grimp作为反编译器的基础)
grimp允许使用new操作,因此更接近java源代码
public class Add extends java.lang.Object
{
public void <init>()
{
Add r0;
r0 := @this: Add;
specialinvoke r0.<java.lang.Object: void <init>()>();
return;
}
public static void main(java.lang.String[])
{
java.lang.String[] r0;
byte b0, b1;
int i2;
java.io.PrintStream $r1;
r0 := @parameter0: java.lang.String[];
b0 = 13;
b1 = 17;
i2 = staticinvoke <Add: int addTwoNum(int,int)>(b0, b1);
$r1 = <java.lang.System: java.io.PrintStream out>;
virtualinvoke $r1.<java.io.PrintStream: void println(int)>(i2);
return;
}
public static int addTwoNum(int, int)
{
int i0, i1, $i4, $i5;
byte b2, b3;
i0 := @parameter0: int;
i1 := @parameter1: int;
b2 = 7;
b3 = 8;
$i4 = i0 + i1;
$i5 = $i4 * 2;
return $i5;
}
}