0%

xCrash 实现简析

xCrash 是爱奇艺最近开源在 Github 的 Crash 捕获工具(https://github.com/iqiyi/xCrash);

xCrash 是一个安卓 APP 的崩溃捕获库,它支持捕获 Native 崩溃和 Java 异常;

xCrash 能在 App 进程崩溃时,在你指定的目录中生成一个 tombstone 文件(格式与安卓系统的 tombstone 文件类似),并且,不需要 root 权限或任何系统权限;

本文将通过 Github 开源的代码,对其简要分析,它是如何实现如此强大的功能的。

初始化

Java Handler 和 Native Handler

1
2
3
4
5
6
7
8
9
10
//init java crash handler
if (params.enableJavaCrashHandler) {
JavaCrashHandler.getInstance().initialize(...);
}

//init native crash handler
int r = Errno.OK;
if (params.enableNativeCrashHandler) {
r = NativeCrashHandler.getInstance().initialize(...);
}

Java 层面

Java 层主要是增加 DefaultHandler

1
2
3
4
5
6
7
8
// 取出之前的 Handler
defaultHandler = Thread.getDefaultUncaughtExceptionHandler();

try {
Thread.setDefaultUncaughtExceptionHandler(this);
} catch (Exception e) {
XCrash.getLogger().e(Util.TAG, "JavaCrashHandler setDefaultUncaughtExceptionHandler failed", e);
}

处理时打印文件,并交个上级 Handler 处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
@Override
public void uncaughtException(Thread thread, Throwable throwable) {
try {
handleException(thread, throwable);
} catch (Exception e) {
XCrash.getLogger().e(Util.TAG, "JavaCrashHandler handleException failed", e);
}

// 交给已有的 Handler 处理
if (this.rethrow && defaultHandler != null) {
defaultHandler.uncaughtException(thread, throwable);
} else {
android.os.Process.killProcess(this.pid);
}
}

handleException 主要打印 Java 的崩溃信息

(1)设备状态信息以及堆栈

设备信息主要通过读系统文件实现;

设备信息

堆栈直接打印就 ok

1
2
PrintWriter pw = new PrintWriter(sw);
throwable.printStackTrace(pw);

其中的 Util 中有些工具类值得参考和学习,比如 root 的判断,/proc/meminfo 内存信息的获取,线程数进程数获取

(2)Logcat 信息

通过 shell 命令实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
List<String> command = new ArrayList<String>();
command.add("/system/bin/logcat");
command.add("-b");
command.add(bufferName);
command.add("-d");
command.add("-v");
command.add("threadtime");
command.add("-t");
command.add(Integer.toString(withPid ? lines : (int) (lines * 1.2)));
if (withPid) {
command.add("--pid");
command.add(pidString);
}
command.add("*:" + priority);

(3)内存信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
Debug.MemoryInfo mi = new Debug.MemoryInfo();
// Android SDK 的 API
// android.os.Debug#getMemoryInfo(android.os.Debug.MemoryInfo)
Debug.getMemoryInfo(mi);

if (Build.VERSION.SDK_INT >= 23) {
sb.append(String.format(Locale.US, memInfoFmt, "Java Heap:", mi.getMemoryStat("summary.java-heap")));
sb.append(String.format(Locale.US, memInfoFmt, "Native Heap:", mi.getMemoryStat("summary.native-heap")));
sb.append(String.format(Locale.US, memInfoFmt, "Code:", mi.getMemoryStat("summary.code")));
sb.append(String.format(Locale.US, memInfoFmt, "Stack:", mi.getMemoryStat("summary.stack")));
sb.append(String.format(Locale.US, memInfoFmt, "Graphics:", mi.getMemoryStat("summary.graphics")));
sb.append(String.format(Locale.US, memInfoFmt, "Private Other:", mi.getMemoryStat("summary.private-other")));
sb.append(String.format(Locale.US, memInfoFmt, "System:", mi.getMemoryStat("summary.system")));
sb.append(String.format(Locale.US, memInfoFmt2, "TOTAL:", mi.getMemoryStat("summary.total-pss"), "TOTAL SWAP:", mi.getMemoryStat("summary.total-swap")));
}

(4)其它线程的堆栈

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// 获取其它线程的栈
Map<Thread, StackTraceElement[]> map = Thread.getAllStackTraces();
for (Map.Entry<Thread, StackTraceElement[]> entry : map.entrySet()) {
Thread thd = entry.getKey();
StackTraceElement[] stacktrace = entry.getValue();
...
sb.append("pid: ").append(pid).append(", tid: ").append(thd.getId()).append(", name: ").append(thd.getName()).append(" >>> ").append(processName).append(" <<<\n");
sb.append("\n");
sb.append("java stacktrace:\n");
for (StackTraceElement element : stacktrace) {
sb.append(" at ").append(element.toString()).append("\n");
}
sb.append("\n");

thdDumped++;
}

Native 层面

Native 的初始化

(1)加载 so 库

1
2
3
4
5
6
7
8
9
10
11
12
13
//load lib
try {
System.loadLibrary("xcrash");
} catch (Throwable e) {
XCrash.getLogger().e(Util.TAG, "NativeCrashHandler System.loadLibrary failed", e);
try {
//for some unusual Android version
System.load(ctx.getFilesDir().getParent() + "/lib/libxcrash.so");
} catch (Throwable e2) {
XCrash.getLogger().e(Util.TAG, "NativeCrashHandler System.load failed", e);
return Errno.LOAD_LIBRARY_FAILED;
}
}

(2)调用 Native 方法

1
private static native int initEx(...);

来看 Native 层

首先注册了 JNI 方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
static JNINativeMethod xc_jni_methods[] = {
...
{
"initEx",
"("
"Landroid/content/Context;"
"Z"
...
"I",
(void *)xc_jni_init_ex
},
...
};
// 注册
if((*env)->RegisterNatives(env, clazz, xc_jni_methods, sizeof(xc_jni_methods) / sizeof(xc_jni_methods[0]))) return -1;

初始化方法中对信号 Handler 进行注册

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// handler 入参指向 static void xc_core_signal_handler(int sig, siginfo_t *si, void *uc) 函数
int xcc_signal_register(xcc_signal_handler_t handler)
{
stack_t ss;
if(NULL == (ss.ss_sp = malloc(XCC_SIGNAL_STACK_SIZE))) return XCC_ERRNO_NOMEM;
ss.ss_size = XCC_SIGNAL_STACK_SIZE;
ss.ss_flags = 0;
if(0 != sigaltstack(&ss, NULL)) return XCC_ERRNO_SYS;

struct sigaction act;
memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
// 创建的 sigaction
act.sa_sigaction = handler;
act.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;

size_t i;
for(i = 0; i < sizeof(xcc_signal_info) / sizeof(xcc_signal_info[0]); i++)
// sigaction() 函数是 Linux 对信号捕捉的 API
if(0 != sigaction(xcc_signal_info[i].signum, &act, &(xcc_signal_info[i].orig_act)))
return XCC_ERRNO_SYS;

return 0;
}

来看对异常信号捕捉的处理

(1)文件的创建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
if((xc_core_log_fd = xc_recorder_create_and_open(xc_core_recorder)) < 0) goto end;

// create new file
self->if_create_new_file = 1;

if((fd = XCC_UTIL_TEMP_FAILURE_RETRY(open(self->log_pathname, new_file_flags, 0644))) >= 0) return fd;

if(self->prepared_fd >= 0)
{
close(self->prepared_fd);
self->prepared_fd = -1;
if((fd = XCC_UTIL_TEMP_FAILURE_RETRY(open(self->log_pathname, new_file_flags, 0644))) >= 0) return fd;
}

return -1;

(2)配置程序允许 dump

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
//set dumpable
orig_dumpable = prctl(PR_GET_DUMPABLE);
errno = 0;
if(0 != prctl(PR_SET_DUMPABLE, 1))
{
xcc_util_write_format_safe(xc_core_log_fd, XC_CORE_ERR_TITLE"set dumpable failed, errno=%d\n\n", errno);
goto end;
}
restore_orig_dumpable = 1;

//set traceable (disable the ptrace restrictions introduced by Yama)
//https://www.kernel.org/doc/Documentation/security/Yama.txt
errno = 0;
if(0 != prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY))
...

(3)子进程进行 dump 收集

1
2
3
4
pid_t dumper_pid = xc_core_fork(xc_core_exec_dumper);

// 父进程等待
int r = XCC_UTIL_TEMP_FAILURE_RETRY(waitpid(dumper_pid, &status, __WALL));

有个重试的小技巧,宏定义

1
2
3
4
5
6
7
8
9
10
11
12
13
14

\#define XCC_UTIL_TEMP_FAILURE_RETRY(exp) ({ \

__typeof__(exp) _rc; \

do { \

errno = 0; \

_rc = (exp); \

} while (_rc == -1 && errno == EINTR); \

_rc; })

dump 的逻辑在 static int xc_core_exec_dumper(void *arg) 函数中

对 /dev/null 设备进行重定向到标准输出

1
2
XCC_UTIL_TEMP_FAILURE_RETRY(dup2(devnull, STDOUT_FILENO));
XCC_UTIL_TEMP_FAILURE_RETRY(dup2(devnull, STDERR_FILENO));

传递参数到管道的写端

1
2
3
4
5
6
7
8
9
10
11
12
//write args to pipe
struct iovec iovs[5] = {
{.iov_base = &xc_core_spot, .iov_len = sizeof(xcc_spot_t)},
{.iov_base = xc_core_log_pathname, .iov_len = xc_core_spot.log_pathname_len},
{.iov_base = xc_core_app_id, .iov_len = xc_core_spot.app_id_len},
{.iov_base = xc_core_app_version, .iov_len = xc_core_spot.app_version_len},
{.iov_base = xc_core_dump_all_threads_whitelist, .iov_len = xc_core_spot.dump_all_threads_whitelist_len}
};
int iovs_cnt = (0 == xc_core_spot.dump_all_threads_whitelist_len ? 4 : 5);
errno = 0;
// 写入管道
ssize_t ret = XCC_UTIL_TEMP_FAILURE_RETRY(writev(pipefd[1], iovs, iovs_cnt));

接下来将管道的数据重定向到标准输入,相当于向控制台输入参数

1
2
3
4
5
//copy the read-side of the args-pipe to stdin (fd: 0)
XCC_UTIL_TEMP_FAILURE_RETRY(dup2(pipefd[0], STDIN_FILENO));

syscall(SYS_close, pipefd[0]);
syscall(SYS_close, pipefd[1]);

为什么要向标准输入传递参数,是因为通过 execl 在新的进程执行 dump

1
2
3
#define XCC_UTIL_XCRASH_DUMPER_FILENAME "libxcrash_dumper.so"
// 执行 libxcrash_dumper.so
execl(xc_core_dumper_pathname, XCC_UTIL_XCRASH_DUMPER_FILENAME, NULL);

具体 libxcrash_dumper.so 如何实现的 dump,稍后分析,先继续看

检查输出的 log 文件是否包含正确的“回溯”

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
int xc_recorder_check_backtrace_valid(xc_recorder_t *self)
{
...
if((fd = XCC_UTIL_TEMP_FAILURE_RETRY(open(self->log_pathname, O_RDONLY | O_CLOEXEC))) < 0) return 0;

while(NULL != xcc_util_gets(line, sizeof(line), fd))
{
if(0 == memcmp(line, "backtrace:\n", 11))
{
//check the next line
if(NULL != xcc_util_gets(line, sizeof(line), fd) && 0 == memcmp(line, " #00 pc ", 11))
r = 1; //we found the backtrace
break;
}
if(i++ > 200) //check the top 200 lines at most
break;
}

if(fd >= 0) close(fd);
return r;
}

之后获取设备信息

获取设备信息

设备信息的大部分逻辑和 Java 层实现类似,基本依靠读取系统文件实现,只是翻译成 native 代码。

可以关注的有几个点:

(1)signal 信号

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
static size_t xc_fallback_get_signal(char *buf, size_t len, siginfo_t *si, pid_t pid)
{
//fault addr
char addr_desc[64];
if(xcc_util_signal_has_si_addr(si))
xcc_fmt_snprintf(addr_desc, sizeof(addr_desc), "%p", si->si_addr);
else
xcc_fmt_snprintf(addr_desc, sizeof(addr_desc), "--------");

//from
char sender_desc[64] = "";
if(xcc_util_signal_has_sender(si, pid))
xcc_fmt_snprintf(sender_desc, sizeof(sender_desc), " from pid %d, uid %d", si->si_pid, si->si_uid);

return xcc_fmt_snprintf(buf, len, "signal %d (%s), code %d (%s%s), fault addr %s\n",
si->si_signo, xcc_util_get_signame(si),
si->si_code, xcc_util_get_sigcodename(si), sender_desc, addr_desc);
}

其中,siginfo_t *si 是 signal_handler 的参数

siginfo_t

(2)寄存器信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
return xcc_fmt_snprintf(buf, len, 
" r0 %08x r1 %08x r2 %08x r3 %08x\n"
" r4 %08x r5 %08x r6 %08x r7 %08x\n"
" r8 %08x r9 %08x r10 %08x r11 %08x\n"
" ip %08x sp %08x lr %08x pc %08x\n\n",
uc->uc_mcontext.arm_r0,
uc->uc_mcontext.arm_r1,
uc->uc_mcontext.arm_r2,
uc->uc_mcontext.arm_r3,
uc->uc_mcontext.arm_r4,
uc->uc_mcontext.arm_r5,
uc->uc_mcontext.arm_r6,
uc->uc_mcontext.arm_r7,
uc->uc_mcontext.arm_r8,
uc->uc_mcontext.arm_r9,
uc->uc_mcontext.arm_r10,
uc->uc_mcontext.arm_fp,
uc->uc_mcontext.arm_ip,
uc->uc_mcontext.arm_sp,
uc->uc_mcontext.arm_lr,
uc->uc_mcontext.arm_pc);

(3)循环打印回溯栈

1
2
3
4
5
6
7
8
9
10
// build line
len = xcc_fmt_snprintf(line, sizeof(line), " #%02zu pc %0"XCC_UTIL_FMT_ADDR" %s", j, rel_pc, name);
if(NULL != symbol)
{
len += xcc_fmt_snprintf(line + len, sizeof(line) - len, " (%s", symbol);
if(offset > 0)
len += xcc_fmt_snprintf(line + len, sizeof(line) - len, "+%"PRIuPTR, offset);
len += xcc_fmt_snprintf(line + len, sizeof(line) - len, ")");
}
len += xcc_fmt_snprintf(line + len, sizeof(line) - len, "\n");

(4)logcat

1
2
3
4
5
6
if(pid_str)
execl("/system/bin/logcat", "logcat", "-b", buffer, "-d", "-v", "threadtime",
"-t", lines_str, "--pid", pid_str, priority, (char *)NULL);
else
execl("/system/bin/logcat", "logcat", "-b", buffer, "-d", "-v", "threadtime",
"-t", lines_str, priority, (char *)NULL);

之后在子线程中回调 Java 层

1
2
if(0 != pthread_create(&thd, NULL, xc_jni_callback_do, NULL)) return;
pthread_join(thd, NULL);

回调前,记录 Java 堆栈

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//java.lang.Thread
jclass class_Thread = (*env)->FindClass(env, "java/lang/Thread");
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(class_Thread, err);
jmethodID method_getAllStackTraces = (*env)->GetStaticMethodID(env, class_Thread, "getAllStackTraces", "()Ljava/util/Map;");
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(method_getAllStackTraces, err);
jmethodID method_getStackTrace = (*env)->GetMethodID(env, class_Thread, "getStackTrace", "()[Ljava/lang/StackTraceElement;");
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(method_getStackTrace, err);
jmethodID method_getName = (*env)->GetMethodID(env, class_Thread, "getName", "()Ljava/lang/String;");
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(method_getName, err);

//java.lang.StackTraceElement
jclass class_StackTraceElement = (*env)->FindClass(env, "java/lang/StackTraceElement");
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(class_StackTraceElement, err);
jmethodID method_toString = (*env)->GetMethodID(env, class_StackTraceElement, "toString", "()Ljava/lang/String;");
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(method_toString, err);

...

循环调用并打印

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
if(0 != xcc_util_write_str(log_fd, "java stacktrace:\n")) goto err;

jobjectArray stackTrace = (jobjectArray)(*env)->CallObjectMethod(env, thread, method_getStackTrace);
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(stackTrace, err);

jsize stackTraceLen = (*env)->GetArrayLength(env, stackTrace);
XC_JNI_CHECK_PENDING_EXCEPTION(err);

jsize j;
for(j = 0; j < stackTraceLen; j++)
{
jobject stackTraceElement = (*env)->GetObjectArrayElement(env, stackTrace, j);
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(stackTraceElement, err);

jstring stackTraceElementStr = (*env)->CallObjectMethod(env, stackTraceElement, method_toString);
XC_JNI_CHECK_NULL_AND_PENDING_EXCEPTION(stackTraceElementStr, err);

const char *c_stackTraceElementStr = (*env)->GetStringUTFChars(env, stackTraceElementStr, 0);
if(0 != xcc_util_write_str(log_fd, " at ")) goto err;
if(0 != xcc_util_write_str(log_fd, c_stackTraceElementStr)) goto err;
if(0 != xcc_util_write_str(log_fd, "\n")) goto err;
(*env)->ReleaseStringUTFChars(env, stackTraceElementStr, c_stackTraceElementStr);
}

完成后回调 callback

1
2
(*env)->CallStaticVoidMethod(env, xc_jni_class_cb, xc_jni_method_cb, j_pathname, j_emergency);
XC_JNI_CHECK_PENDING_EXCEPTION(err);

Native Dump

回过头来看 Native 如何 Dump 的,入口在 xcd_core.c 文件中。

首先是读取标准输入传递的参数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
if(0 != (r = xcd_core_read_stdin("spot", (void *)&xcd_core_spot, sizeof(xcc_spot_t)))) return r;

if(0 == xcd_core_spot.log_pathname_len) return XCC_ERRNO_INVAL;
if(NULL == (xcd_core_log_pathname = calloc(1, xcd_core_spot.log_pathname_len + 1))) return XCC_ERRNO_NOMEM;
if(0 != (r = xcd_core_read_stdin("path", (void *)xcd_core_log_pathname, xcd_core_spot.log_pathname_len))) return r;

if(0 == xcd_core_spot.app_id_len) return XCC_ERRNO_INVAL;
if(NULL == (xcd_core_app_id = calloc(1, xcd_core_spot.app_id_len + 1))) return XCC_ERRNO_NOMEM;
if(0 != (r = xcd_core_read_stdin("appid", (void *)xcd_core_app_id, xcd_core_spot.app_id_len))) return r;

if(0 == xcd_core_spot.app_version_len) return XCC_ERRNO_INVAL;
if(NULL == (xcd_core_app_version = calloc(1, xcd_core_spot.app_version_len + 1))) return XCC_ERRNO_NOMEM;
if(0 != (r = xcd_core_read_stdin("appver", (void *)xcd_core_app_version, xcd_core_spot.app_version_len))) return r;

...

然后注册自身 crash 信号监听,防止 xCrash 自身的 crash

1
2
3
4
5
6
7
8
9
10
11
if(xcd_core_log_fd >= 0)
{
//dump signal, code, backtrace
if(0 != xcc_util_write_format_safe(xcd_core_log_fd,
"\n\n"
"xcrash error debug:\n"
"dumper has crashed (signal: %d, code: %d)\n",
si->si_signo, si->si_code)) goto err;
if(0 != xcc_unwind_get(uc, NULL, buf, sizeof(buf))) goto err;
if(0 != xcc_util_write_str(xcd_core_log_fd, buf)) goto err;
}

查找 Crash 收集线程

首先查找出所有的线程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
static int xcd_process_load_threads(xcd_process_t *self)
{
...
// /proc/[PID]/task 目录下,每一个线程一个子目录
snprintf(buf, sizeof(buf), "/proc/%d/task", self->pid);
if(NULL == (dir = opendir(buf))) return XCC_ERRNO_SYS;
while(NULL != (ent = readdir(dir)))
{
if(0 == strcmp(ent->d_name, ".")) continue;
if(0 == strcmp(ent->d_name, "..")) continue;
if(0 != xcc_util_atoi(ent->d_name, &tid)) continue;

if(NULL == (thd = malloc(sizeof(xcd_thread_info_t)))) return XCC_ERRNO_NOMEM;
xcd_thread_init(&(thd->t), self->pid, tid);

TAILQ_INSERT_TAIL(&(self->thds), thd, link);
self->nthds++;
}
closedir(dir);

return 0;
}

再遍历线程找出 Crash 收集线程是否存在

1
2
3
4
5
TAILQ_FOREACH(thd, &((*self)->thds), link)
{
if(thd->t.tid == (*self)->crash_tid)
return 0; //OK
}

暂停进程中搜集的所有线程,以便 dump

1
2
3
4
5
6
void xcd_process_suspend_threads(xcd_process_t *self)
{
xcd_thread_info_t *thd;
TAILQ_FOREACH(thd, &(self->thds), link)
xcd_thread_suspend(&(thd->t));
}

加载 /system/build.prop 文件内容

/system/build.prop

加载进程信息

读取所有线程的信息

1
2
3
4
5
6
7
8
9
10
11
TAILQ_FOREACH(thd, &(self->thds), link)
{
//load thread info
xcd_thread_load_info(&(thd->t));

//load thread regs
if(thd->t.tid != self->crash_tid)
xcd_thread_load_regs(&(thd->t));
else
xcd_thread_load_regs_from_ucontext(&(thd->t), self->uc);
}

其中,读取每个线程寄存器的信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void xcd_regs_load_from_ucontext(xcd_regs_t *self, ucontext_t *uc)
{
self->r[XCD_REGS_X0] = uc->uc_mcontext.regs[0];
self->r[XCD_REGS_X1] = uc->uc_mcontext.regs[1];
self->r[XCD_REGS_X2] = uc->uc_mcontext.regs[2];
self->r[XCD_REGS_X3] = uc->uc_mcontext.regs[3];
self->r[XCD_REGS_X4] = uc->uc_mcontext.regs[4];
self->r[XCD_REGS_X5] = uc->uc_mcontext.regs[5];
self->r[XCD_REGS_X6] = uc->uc_mcontext.regs[6];
self->r[XCD_REGS_X7] = uc->uc_mcontext.regs[7];
self->r[XCD_REGS_X8] = uc->uc_mcontext.regs[8];
self->r[XCD_REGS_X9] = uc->uc_mcontext.regs[9];
...
self->r[XCD_REGS_SP] = uc->uc_mcontext.sp;
self->r[XCD_REGS_PC] = uc->uc_mcontext.pc;
}

然后加载每个线程的 maps 信息

1
2
3
//load maps
if(0 != (r = xcd_maps_create(&(self->maps), self->pid)))
XCD_LOG_ERROR("PROCESS: create maps failed, errno=%d", r);

记录系统信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
if(0 != (r = xcc_util_write_format(log_fd, "Tombstone maker: '%s'\n",    XCC_VERSION_STR))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "Crash type: '%s'\n", XCC_UTIL_CRASH_TYPE))) return r;
if(0 != (r = xcd_sys_record_time (log_fd, "Start time", start_time))) return r;
if(0 != (r = xcd_sys_record_time (log_fd, "Crash time", crash_time))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "App ID: '%s'\n", app_id))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "App version: '%s'\n", app_version))) return r;
...
if(0 != (r = xcc_util_write_format(log_fd, "ABI list: '%s'\n", props->abi_list))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "Manufacturer: '%s'\n", props->manufacturer))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "Brand: '%s'\n", props->brand))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "Model: '%s'\n", props->model))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "Build fingerprint: '%s'\n", props->build_fingerprint))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "Revision: '%s'\n", props->revision))) return r;
if(0 != (r = xcc_util_write_format(log_fd, "ABI: '%s'\n", XCC_UTIL_ABI_STRING))) return r;

记录进程信息

打印 Crash 进程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
TAILQ_FOREACH(thd, &(self->thds), link)
{
if(thd->t.tid == self->crash_tid)
{
if(0 != (r = xcd_thread_record_info(&(thd->t), log_fd, self->pname))) return r;
if(0 != (r = xcd_process_record_signal_info(self, log_fd))) return r;
if(0 != (r = xcd_process_record_abort_message(self, log_fd))) return r;
if(0 != (r = xcd_thread_record_regs(&(thd->t), log_fd))) return r;
if(0 == xcd_thread_load_frames(&(thd->t), self->maps))
{
if(0 != (r = xcd_thread_record_backtrace(&(thd->t), log_fd))) return r;
if(0 != (r = xcd_thread_record_buildid(&(thd->t), log_fd, xcc_util_signal_has_si_addr(self->si) ? (uintptr_t)self->si->si_addr : 0))) return r;
if(0 != (r = xcd_thread_record_stack(&(thd->t), log_fd))) return r;
if(0 != (r = xcd_thread_record_memory(&(thd->t), log_fd))) return r;
}
if(dump_map) if(0 != (r = xcd_maps_record(self->maps, log_fd))) return r;
if(0 != (r = xcd_process_record_logcat(self, log_fd, logcat_system_lines, logcat_events_lines, logcat_main_lines, api_level))) return r;
if(dump_fds) if(0 != (r = xcd_process_record_fds(self, log_fd))) return r;
if(0 != (r = xcd_meminfo_record(log_fd, self->pid))) return r;

break;
}
}

以及非 Crash 进程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
TAILQ_FOREACH(thd, &(self->thds), link)
{
if(thd->t.tid != self->crash_tid)
{
//check regex for thread name
if(NULL != re && re_cnt > 0 && !xcd_process_if_need_dump(thd->t.tname, re, re_cnt))
{
continue;
}
thd_matched_regex++;

//check dump count limit
if(dump_all_threads_count_max > 0 && thd_dumped >= dump_all_threads_count_max)
{
thd_ignored_by_limit++;
continue;
}

if(0 != (r = xcc_util_write_str(log_fd, XCC_UTIL_THREAD_SEP))) goto end;
if(0 != (r = xcd_thread_record_info(&(thd->t), log_fd, self->pname))) goto end;
if(0 != (r = xcd_thread_record_regs(&(thd->t), log_fd))) goto end;
if(0 == xcd_thread_load_frames(&(thd->t), self->maps))
{
if(0 != (r = xcd_thread_record_backtrace(&(thd->t), log_fd))) goto end;
if(0 != (r = xcd_thread_record_stack(&(thd->t), log_fd))) goto end;
}
thd_dumped++;
}
}

最后重新启动所有线程

1
2
//resume all threads in the process
xcd_process_resume_threads(xcd_core_proc);

小结

That’s all. 可见,xCrash 的实现是很精简、清晰的,通过将 Crash 信息直接写入文件能够大概率成功捕获到我们需要的有用信息。

在日常开发中,xCrash 确实帮助到自己解决一些疑难的 Native 层 Crash,通过阅读源码也更能理解到 Crash 捕获的整个机制,希望本文对读者也有所帮助和启发。