本文来分析下matrix
对于线程的监控,matrix
对于线程的监控主要 hook pthread 的pthread_create
pthread_detach
pthread_join
pthread_setname_np
几个方法。
原理
先来看下为什么 hook pthread 的几个方法就可以监控到线程。
一般的 java 线程写法
new Thread(new Runable{
@Override
void run(){
// do action
}
}).start();
public synchronized void start() {
...
started = false;
try {
//进入到 native 中
nativeCreate(this, stackSize, daemon);
started = true;
} finally {
try {
if (!started) {
group.threadStartFailed(this);
}
} catch (Throwable ignore) {
/* do nothing. If start0 threw a Throwable then
it will be passed up the call stack */
}
}
}
static void Thread_nativeCreate(JNIEnv* env, jclass, jobject java_thread, jlong stack_size,jboolean daemon) {
...
Thread::CreateNativeThread(env, java_thread, stack_size, daemon == JNI_TRUE);
}
void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
CHECK(java_peer != nullptr);
Thread* self = static_cast<JNIEnvExt*>(env)->GetSelf();
if (VLOG_IS_ON(threads)) {
if (java_name != nullptr) {
thread_name = java_name->ToModifiedUtf8();
} else {
thread_name = "(Unnamed)";
}
}
Runtime* runtime = Runtime::Current();
// pthread 的回调参数
Thread* child_thread = new Thread(is_daemon);
// 设置 java 的 thread 类
child_thread->tlsPtr_.jpeer = env->NewGlobalRef(java_peer);
stack_size = FixStackSize(stack_size);
// 给 java thread 的 nativePeer 设置值为 child_thread 的地址
env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer,
reinterpret_cast<jlong>(child_thread));
std::string error_msg;
std::unique_ptr<JNIEnvExt> child_jni_env_ext(
JNIEnvExt::Create(child_thread, Runtime::Current()->GetJavaVM(), &error_msg));
int pthread_create_result = 0;
if (child_jni_env_ext.get() != nullptr) {
pthread_t new_pthread;
pthread_attr_t attr;
child_thread->tlsPtr_.tmp_jni_env = child_jni_env_ext.get();
// 创建 pthread ,回调函数为 Thread::CreateCallback ,回调函数的参数为 child_thread
pthread_create_result = pthread_create(&new_pthread,
&attr,
Thread::CreateCallback,
child_thread);
}
}
void* Thread::CreateCallback(void* arg) {
Thread* self = reinterpret_cast<Thread*>(arg);
Runtime* runtime = Runtime::Current();
{
runtime->GetRuntimeCallbacks()->ThreadStart(self);
// Invoke the 'run' method of our java.lang.Thread.
ObjPtr<mirror::Object> receiver = self->tlsPtr_.opeer;
jmethodID mid = WellKnownClasses::java_lang_Thread_run;
ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
// 执行 java thread run 方法
InvokeVirtualOrInterfaceWithJValues(soa, ref.get(), mid, nullptr);
}
// Detach and delete self.
Runtime::Current()->GetThreadList()->Unregister(self);
return nullptr;
}
所以可以通过 hook pthread 的一系列方法可以监控到线程。
Matrix Thread Hook
主要的 hook 逻辑在 PthreadHook.cpp
中
- 开始 hook
void InstallHooks(bool enable_debug) {
FETCH_ORIGIN_FUNC(pthread_create)
// 替换
/**
* if (!orig_pthread_create) {
void *handle = dlopen("libc.so", RTLD_LAZY);
if (handle) {
orig_pthread_create = (fn_pthread_create_t)dlsym(handle, "pthread_create");
}
}
**/
// 同上转换
FETCH_ORIGIN_FUNC(pthread_setname_np)
FETCH_ORIGIN_FUNC(pthread_detach)
FETCH_ORIGIN_FUNC(pthread_join)
if (sThreadTraceEnabled) {
thread_trace::thread_trace_init();
}
matrix::PauseLoadSo();
xhook_block_refresh();
{
int ret = xhook_export_symtable_hook("libc.so", "pthread_create",
(void *) HANDLER_FUNC_NAME(pthread_create),
nullptr);
LOGD(LOG_TAG, "export table hook sym: pthread_create, ret: %d", ret);
ret = xhook_export_symtable_hook("libc.so", "pthread_setname_np",
(void *) HANDLER_FUNC_NAME(pthread_setname_np),
nullptr);
LOGD(LOG_TAG, "export table hook sym: pthread_setname_np, ret: %d", ret);
xhook_grouped_register(HOOK_REQUEST_GROUPID_PTHREAD, ".*/.*\\.so$", "pthread_create",
(void *) HANDLER_FUNC_NAME(pthread_create), nullptr);
xhook_grouped_register(HOOK_REQUEST_GROUPID_PTHREAD, ".*/.*\\.so$",
"pthread_setname_np",
(void *) HANDLER_FUNC_NAME(pthread_setname_np), nullptr);
xhook_grouped_register(HOOK_REQUEST_GROUPID_PTHREAD, ".*/.*\\.so$", "pthread_detach",
(void *) HANDLER_FUNC_NAME(pthread_detach), nullptr);
xhook_grouped_register(HOOK_REQUEST_GROUPID_PTHREAD, ".*/.*\\.so$", "pthread_join",
(void *) HANDLER_FUNC_NAME(pthread_join), nullptr);
ret = xhook_export_symtable_hook("libc.so", "pthread_detach",
(void *) HANDLER_FUNC_NAME(pthread_detach), nullptr);
LOGD(LOG_TAG, "export table hook sym: pthread_detach, ret: %d", ret);
ret = xhook_export_symtable_hook("libc.so", "pthread_join",
(void *) HANDLER_FUNC_NAME(pthread_join), nullptr);
LOGD(LOG_TAG, "export table hook sym: pthread_join, ret: %d", ret);
xhook_enable_debug(enable_debug ? 1 : 0);
xhook_enable_sigsegv_protection(0);
xhook_refresh(0);
}
xhook_unblock_refresh();
matrix::ResumeLoadSo();
}
-
说明
宏定义在 matrix-commons Model 中的 HookCommon.h 中
// FETCH_ORIGIN_FUNC(pthread_create)
// 是一个宏定义,需要替换还原出代码
// 对应的宏
#define HANDLER_FUNC_NAME(fn_name) h_##fn_name
#define ORIGINAL_FUNC_NAME(fn_name) orig_##fn_name
#define RTLD_LAZY 0x00001
#define FUNC_TYPE(sym) fn_##sym##_t
#define FETCH_ORIGIN_FUNC(sym) \
if (!ORIGINAL_FUNC_NAME(sym)) { \
void *handle = dlopen(ORIGINAL_LIB, RTLD_LAZY); \
if (handle) { \
ORIGINAL_FUNC_NAME(sym) = (FUNC_TYPE(sym))dlsym(handle, #sym); \
} \
}
// 替换后代码
if (!orig_pthread_create) {
void *handle = dlopen("libc.so", RTLD_LAZY);
if (handle) {
orig_pthread_create = (fn_pthread_create_t)dlsym(handle, "pthread_create");
}
}
DECLARE_HOOK_ORIG(int, pthread_create, pthread_t*, pthread_attr_t const*,
pthread_hook::pthread_routine_t, void*);
// 替换后
typedef int (*fn_pthread_create_t)(pthread_t*, pthread_attr_t const*,pthread_hook::pthread_routine_t, void*);
extern fn_pthread_create_t orig_pthread_create;
int h_pthread_create(pthread_t*, pthread_attr_t const*,pthread_hook::pthread_routine_t, void*);
// 再看这句
int ret = xhook_export_symtable_hook("libc.so", "pthread_create",(void *) HANDLER_FUNC_NAME(pthread_create),nullptr);
// 替换后
int ret = xhook_export_symtable_hook("libc.so", "pthread_create",(void *) h_pthread_create,nullptr);
// 而 h_pthread_create 这个函数也是一个宏定义 对于为
DEFINE_HOOK_FUN(int, pthread_create,
pthread_t *pthread, pthread_attr_t const *attr,
pthread_hook::pthread_routine_t start_routine, void *args) {...}
// 替换后
fn_pthread_create_t orig_pthread_create;
int h_pthread_create(pthread_t *pthread, pthread_attr_t const *attr,pthread_hook::pthread_routine_t start_routine, void *args){...}
其余的宏也如此。
- pthread_create hook
对于 pthread_create 方法,hook 后会执行 h_pthread_create 方法
fn_pthread_create_t orig_pthread_create;
int h_pthread_create(pthread_t *pthread, pthread_attr_t const *attr,pthread_hook::pthread_routine_t start_routine, void *args){
Dl_info callerInfo = {};
bool callerInfoOk = true;
if (dladdr(__builtin_return_address(0), &callerInfo) == 0) {
LOGE(LOG_TAG, "%d >> Fail to get caller info.", ::getpid());
callerInfoOk = false;
}
pthread_attr_t tmpAttr;
if (LIKELY(attr == nullptr)) {
...
} else {
tmpAttr = *attr;
}
int ret = 0;
if (sThreadTraceEnabled) {
auto *routine_wrapper = thread_trace::wrap_pthread_routine(start_routine, args);
// 执行原函数
CALL_ORIGIN_FUNC_RET(int, tmpRet, pthread_create, pthread, &tmpAttr,
routine_wrapper->wrapped_func,
routine_wrapper);
/**
*if(!orig_pthread_create){
* void *handle = dlopen("libc.so", 0x00001);
* if (handle) {
* orig_pthread_create = (fn_pthread_create_t)dlsym(handle, #pthread_create);
* }
*}
* int tmpRet = orig_pthread_create(pthread, &tmpAttr,routine_wrapper->wrapped_func,routine_wrapper);
**/
ret = tmpRet;
} else {
...
}
// 收集信息
if (LIKELY(ret == 0) && sThreadTraceEnabled) {
thread_trace::handle_pthread_create(*pthread);
}
if (LIKELY(attr == nullptr)) {
pthread_attr_destroy(&tmpAttr);
}
return ret;
}
// notice: 在父线程回调此函数
void thread_trace::handle_pthread_create(const pthread_t pthread) {
const char *arch =
#ifdef __aarch64__
"aarch64";
#elif defined __arm__
"arm";
#endif
LOGD(TAG, "+++++++ on_pthread_create, %s", arch);
pid_t tid = pthread_gettid_np(pthread);
...
if (!m_quicken_unwind) {
const size_t BUF_SIZE = 1024;
char *java_stacktrace = static_cast<char *>(malloc(BUF_SIZE));
strncpy(java_stacktrace, "(init stacktrace)", BUF_SIZE);
if (m_java_stacktrace_mutex.try_lock_for(std::chrono::milliseconds(100))) {
if (java_stacktrace) {
// 获取 java 堆栈
get_java_stacktrace(java_stacktrace, BUF_SIZE);
}
m_java_stacktrace_mutex.unlock();
} else {
LOGE(TAG, "maybe reentrant!");
}
LOGD(TAG, "parent_tid: %d -> tid: %d", pthread_gettid_np(pthread_self()), tid);
bool recorded = on_pthread_create_locked(pthread, java_stacktrace, false, tid);
if (!recorded && java_stacktrace) {
free(java_stacktrace);
}
} else {
...
}
//
rp_release();
notify_routine(pthread);
LOGD(TAG, "------ on_pthread_create end");
}
//static std::map<pthread_t, pthread_meta_t> m_pthread_metas;
static inline bool
on_pthread_create_locked(const pthread_t pthread, char *java_stacktrace, bool quicken_unwind,
pid_t tid) {
pthread_meta_lock meta_lock(m_pthread_meta_mutex);
// always false
if (m_pthread_metas.count(pthread)) {
LOGD(TAG, "on_pthread_create: thread already recorded");
return false;
}
// 从 m_pthread_metas 取出 key=pthread 的 pthread_meta_t,没有就创建一个并添加到 m_pthread_metas 中
pthread_meta_t &meta = m_pthread_metas[pthread];
meta.tid = tid;
// 如果还没 setname, 此时拿到的是父线程的名字, 在 setname 的时候有一次更正机会, 否则继承父线程名字
// 如果已经 setname, 那么此时拿到的就是当前创建线程的名字
meta.thread_name = static_cast<char *>(malloc(sizeof(char) * THREAD_NAME_LEN));
if (0 != pthread_getname_ext(pthread, meta.thread_name, THREAD_NAME_LEN)) {
char temp_name[THREAD_NAME_LEN];
snprintf(temp_name, THREAD_NAME_LEN, "tid-%d", pthread_gettid_np(pthread));
strncpy(meta.thread_name, temp_name, THREAD_NAME_LEN);
}
LOGD(TAG, "on_pthread_create: pthread = %ld, thread name: %s, %llu", pthread, meta.thread_name,
(uint64_t) tid);
// 将线程名字匹配正则表达式 ".*" ,成功就加入
if (test_match_thread_name(meta)) {
m_filtered_pthreads.insert(pthread);
}
uint64_t native_hash = 0;
uint64_t java_hash = 0;
// 利用 wechat_backtrace 获取 native 堆栈
if (quicken_unwind) {
meta.unwind_mode = wechat_backtrace::Quicken;
wechat_backtrace::quicken_based_unwind(meta.native_backtrace.frames.get(),
meta.native_backtrace.max_frames,
meta.native_backtrace.frame_size);
} else {
meta.unwind_mode = wechat_backtrace::get_backtrace_mode();
wechat_backtrace::unwind_adapter(meta.native_backtrace.frames.get(),
meta.native_backtrace.max_frames,
meta.native_backtrace.frame_size);
}
native_hash = hash_backtrace_frames(&(meta.native_backtrace));
if (java_stacktrace) {
meta.java_stacktrace.store(java_stacktrace);
java_hash = hash_str(java_stacktrace);
LOGD(TAG, "on_pthread_create: java hash = %llu", (wechat_backtrace::ullint_t) java_hash);
}
LOGD(TAG, "on_pthread_create: pthread = %ld, thread name: %s end.", pthread, meta.thread_name);
// 合并
if (native_hash || java_hash) {
meta.hash = hash_combine(native_hash, java_hash);
}
return true;
}
总的来说有两点,一是在 m_pthread_metas 这个 map 中记录 pthread_t 为 key , pthread_meta_t 为 value 的信息,pthread_meta_t 可记录 java/native 堆栈 、tid、thread_name 及 hash 信息。二是将符合正则表达式(".*")的 metas.thread_name 记录到 m_filtered_pthreads(set) 中。
- pthread_setname_np hook
ret = xhook_export_symtable_hook("libc.so", "pthread_setname_np",
(void *) HANDLER_FUNC_NAME(pthread_setname_np),//h_pthread_setname_np
nullptr);
DEFINE_HOOK_FUN(int, pthread_setname_np, pthread_t pthread, const char *name) {
CALL_ORIGIN_FUNC_RET(int, ret, pthread_setname_np, pthread, name);
if (LIKELY(ret == 0) && sThreadTraceEnabled) {
thread_trace::handle_pthread_setname_np(pthread, name);
}
return ret;
}
/**
* 设置线程的名字
fn_pthread_setname_np_t orig_pthread_setname_np;
int h_pthread_setname_np(pthread_t pthread, const char *name){
if (!orig_pthread_setname_np) {
void *handle = dlopen("libc.so", 0x01);
if (handle) {
orig_pthread_setname_np = (fn_pthread_setname_np_t)dlsym(handle, pthread_setname_np);
}
}
if (LIKELY(ret == 0) && sThreadTraceEnabled) {
thread_trace::handle_pthread_setname_np(pthread, name);
}
return ret;
}
**/
/**
* ~~handle_pthread_setname_np 有可能在 handle_pthread_create 之前先执行~~
* 在增加了 cond 之后, 必然后于 on_pthread_create 执行
*
* @param pthread
* @param name
*/
void thread_trace::handle_pthread_setname_np(pthread_t pthread, const char *name) {
if (NULL == name) {
LOGE(TAG, "setting name null");
return;
}
const size_t name_len = strlen(name);
if (0 == name_len || name_len >= THREAD_NAME_LEN) {
LOGE(TAG, "pthread name is illegal, just ignore. len(%s)", name);
return;
}
LOGD(TAG, "++++++++ pre handle_pthread_setname_np tid: %d, %s", pthread_gettid_np(pthread),
name);
{
pthread_meta_lock meta_lock(m_pthread_meta_mutex);
if (!m_pthread_metas.count(pthread)) { // always false
// 到这里说明没有回调 on_pthread_create, setname 对 on_pthread_create 是可见的
auto lost_thread_name = static_cast<char *>(malloc(sizeof(char) * THREAD_NAME_LEN));
// 拿到 pthread 的名字
pthread_getname_ext(pthread, lost_thread_name, THREAD_NAME_LEN);
LOGE(TAG,
"handle_pthread_setname_np: pthread hook lost: {%s} -> {%s}, maybe on_create has not been called",
lost_thread_name, name);
free(lost_thread_name);
return;
}
// 到这里说明 on_pthread_create 已经回调了, 需要修正并检查新的线程名是否 match 正则
pthread_meta_t &meta = m_pthread_metas.at(pthread);
// 设置名字
strncpy(meta.thread_name, name, THREAD_NAME_LEN);
bool parent_match = m_filtered_pthreads.count(pthread) != 0;
// 如果新线程名不 match, 但父线程名 match, 说明需要从 filter 集合中移除
if (!test_match_thread_name(meta) && parent_match) {
m_filtered_pthreads.erase(pthread);
LOGD(TAG, "--------------------------");
return;
}
// 如果新线程 match, 但父线程名不 match, 说明需要添加仅 filter 集合
if (test_match_thread_name(meta) && !parent_match) {
m_filtered_pthreads.insert(pthread);
LOGD(TAG, "--------------------------");
return;
}
}
// 否则, 啥也不干 (都 match, 都不 match)
LOGD(TAG, "--------------------------");
}
找到了 m_pthread_metas 中的 pthread_meta_t,对其修改了名字,最后根据新改的名字是否匹配正则(".*") 和父线程名是否匹配,对 m_filtered_pthreads 进行了删除和增加。
- pthread_detach 和 pthread_join hook hook
DEFINE_HOOK_FUN(int, pthread_detach, pthread_t pthread) {
CALL_ORIGIN_FUNC_RET(int, ret, pthread_detach, pthread);
LOGD(LOG_TAG, "pthread_detach : %d", ret);
if (LIKELY(ret == 0) && sThreadTraceEnabled) {
thread_trace::handle_pthread_release(pthread);
}
return ret;
}
DEFINE_HOOK_FUN(int, pthread_join, pthread_t pthread, void **return_value_ptr) {
CALL_ORIGIN_FUNC_RET(int, ret, pthread_join, pthread, return_value_ptr);
LOGD(LOG_TAG, "pthread_join : %d", ret);
if (LIKELY(ret == 0) && sThreadTraceEnabled) {
thread_trace::handle_pthread_release(pthread);
}
return ret;
}
void thread_trace::handle_pthread_release(pthread_t pthread) {
LOGD(TAG, "handle_pthread_release");
if (!m_trace_pthread_release) {
LOGD(TAG, "handle_pthread_release disabled");
return;
}
on_pthread_release(pthread);
}
static void on_pthread_release(pthread_t pthread) {
LOGD(TAG, "on_pthread_release");
pthread_meta_lock meta_lock(m_pthread_meta_mutex);
if (!m_pthread_metas.count(pthread)) {
LOGD(TAG, "on_pthread_exit: thread not found");
return;
}
erase_meta(m_pthread_metas, pthread, m_pthread_metas.at(pthread));
LOGD(TAG, "on_pthread_release end");
}
static void erase_meta(std::map<pthread_t, pthread_meta_t> &metas, pthread_t &pthread, pthread_meta_t &meta) {
free(meta.thread_name);
char *java_stacktrace = meta.java_stacktrace.load(std::memory_order_acquire);
if (java_stacktrace) {
free(java_stacktrace);
}
m_pthread_metas.erase(pthread);
}
在 m_pthread_metas 删除了 key=pthread 的数据。
dump pthread json_file
static inline void pthread_dump_json_impl(FILE *log_file) {//log_file 需要记录到该文件中
LOGD(TAG, "pthread dump waiting count: %zu", m_pthread_routine_flags.size());
std::map<uint64_t, std::vector<pthread_meta_t>> pthread_metas_not_exited;
for (auto &i : m_filtered_pthreads) {
auto &meta = m_pthread_metas[i];
if (meta.hash) {
auto &hash_bucket = pthread_metas_not_exited[meta.hash];
hash_bucket.emplace_back(meta);
}
}
std::map<uint64_t, std::vector<pthread_meta_t>> pthread_metas_not_released;
for (auto &i : m_pthread_metas) {
auto &meta = i.second;
if (!meta.exited) { //一般为 false
continue;
}
if (meta.hash) {
auto &hash_bucket = pthread_metas_not_released[meta.hash];
hash_bucket.emplace_back(meta);
}
}
char *json_str = NULL;
cJSON *json_array_threads_not_exited = NULL;
cJSON *json_array_threads_not_released = NULL;
bool success = false;
cJSON *json_obj = cJSON_CreateObject();
if (!json_obj) {
goto err;
}
json_array_threads_not_exited = cJSON_AddArrayToObject(json_obj, "PthreadHook_not_exited");
json_array_threads_not_released = cJSON_AddArrayToObject(json_obj, "PthreadHook_not_released");
if (!json_array_threads_not_exited || !json_array_threads_not_released) {
goto err;
}
// 将 pthread_metas_not_exited 的内容加入到 json_array_threads_not_exited 中
success &= append_meta_2_json_array(json_array_threads_not_exited, pthread_metas_not_exited);
success &= append_meta_2_json_array(json_array_threads_not_released, pthread_metas_not_released);
json_str = cJSON_PrintUnformatted(json_obj);
cJSON_Delete(json_obj);
flogger0(log_file, "%s", json_str);
cJSON_free(json_str);
return;
err:
LOGD(TAG, "ERROR: create cJSON object failed");
cJSON_Delete(json_obj);
return;
}
将 m_filtered_pthreads 中记录的数据和 m_pthread_metas 中 metas.exited=true 的数据输出到一个 json_log 文件中。文件类似与
{
"PthreadHook_not_exited":
[
{
"hash": "614404817278743815",
"native": "#pc cda00 (null) (/data/app/com.tencent.matrix.test.memoryhook-ns8hChwbmXvfAOKF9wkrHw==/lib/arm64/libwechatbacktrace.so);",
"java": "dalvik.system.VMStack.getThreadStackTrace(Native Method);com.tencent.matrix.hook.HookManager.getStack(HookManager.java:163);",
"count": "2",
"threads":
[
{
"tid": "6673",
"name": "Test"
},
{
"tid": "5962",
"name": "Test"
}
]
}
],
"PthreadHook_not_released":
[]
}