Android Zygote 进程的启动
扫描二维码
随时随地手机看文章
Zygote意为“受精卵”,即所有的Android进程都是由其“发育”而来的意思。本人试着把啃代码学到的内容整理出来,希望会对自己以后有用。
Init
Init进程是Linux系统上的第一个用户进程,用户为root,拥有系统中最高的权限。它会实现挂载文件系统,并启动ServiceManager/Zygote两大进程,正式这两大进程构成了Android系统的基础。
简单来说,Init进程通过解析init.rc文件,然后执行自己的工作,其中,Zygote进程相关的配置如下(system/core/rootdir/init.rc):
service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server class main socket zygote stream 660 root system onrestart write /sys/android_power/request_state wake onrestart write /sys/power/state on onrestart restart media onrestart restart netd
从上面,可以看到Zygote实际上执行的是/system/bin/app_process应用程序,参数中包含了--zygote和--start-system-server。 另外,
socket zygote stream 660 root system
属性的配置,说明Init进程在启动zygote进程时,会为zygote进程创建一个stream型的domain socket,并保存到ANDROID_SOCKET_zygote系统变量中。(对这个过程涉及到init进程的实现,可以参考代码system/core/init/init.c)
app_process Init进程启动Zygote进程以后,代码执行到app_process的主入口(frameworks/base/cmds/app_process/app_main.cpp):
int main(int argc, const char* const argv[]) { // These are global variables in ProcessState.cpp mArgC = argc; mArgV = argv; mArgLen = 0; for (int i=0; i<argc; i++) { mArgLen += strlen(argv[i]) + 1; } mArgLen--; AppRuntime runtime; const char* argv0 = argv[0]; // Process command line arguments // ignore argv[0] argc--; argv++; // Everything up to '--' or first non '-' arg goes to the vm int i = runtime.addVmArguments(argc, argv); // Parse runtime arguments. Stop at first unrecognized option. bool zygote = false; bool startSystemServer = false; bool application = false; const char* parentDir = NULL; const char* niceName = NULL; const char* className = NULL; while (i < argc) { const char* arg = argv[i++]; if (!parentDir) { parentDir = arg; } else if (strcmp(arg, "--zygote") == 0) { zygote = true; niceName = "zygote"; } else if (strcmp(arg, "--start-system-server") == 0) { startSystemServer = true; } else if (strcmp(arg, "--application") == 0) { application = true; } else if (strncmp(arg, "--nice-name=", 12) == 0) { niceName = arg + 12; } else { className = arg; break; } } if (niceName && *niceName) { setArgv0(argv0, niceName); set_process_name(niceName); } runtime.mParentDir = parentDir; if (zygote) { runtime.start("com.android.internal.os.ZygoteInit", startSystemServer ? "start-system-server" : ""); } else if (className) { // Remainder of args get passed to startup class main() runtime.mClassName = className; runtime.mArgC = argc - i; runtime.mArgV = argv + i; runtime.start("com.android.internal.os.RuntimeInit", application ? "application" : "tool"); } else { fprintf(stderr, "Error: no class name or --zygote supplied.n"); app_usage(); LOG_ALWAYS_FATAL("app_process: no class name or --zygote supplied."); return 10; } }
main函数首先分析参数,决定下一步要做什么:
while (i < argc) { const char* arg = argv[i++]; if (!parentDir) { parentDir = arg; } else if (strcmp(arg, "--zygote") == 0) { zygote = true; niceName = "zygote"; } else if (strcmp(arg, "--start-system-server") == 0) { startSystemServer = true; } else if (strcmp(arg, "--application") == 0) { application = true; } else if (strncmp(arg, "--nice-name=", 12) == 0) { niceName = arg + 12; } else { className = arg; break; } }
上一步中,我们知道Init进程启动Zygote进程的时候,包含了--zygote参数和--start-system-server参数,所以,现在zygote和startSystemServer都为true,所以接下来执行:
if (zygote) { runtime.start("com.android.internal.os.ZygoteInit", startSystemServer ? "start-system-server" : ""); } else if (className) { ...... } else { ...... }
这次的参数分别为“com.android.internal.os.ZygoteInit”和“start-system-server”。 AndroidRunTime runtime的类型为AppRunTime,AppRumTime的定义(frameworks/base/cmds/app_process/app_main.cpp):
class AppRuntime : public AndroidRuntime { public: AppRuntime() : mParentDir(NULL) , mClassName(NULL) , mClass(NULL) , mArgC(0) , mArgV(NULL) { } #if 0 // this appears to be unused const char* getParentDir() const { return mParentDir; } #endif const char* getClassName() const { return mClassName; } virtual void onVmCreated(JNIEnv* env) { if (mClassName == NULL) { return; // Zygote. Nothing to do here. } /* * This is a little awkward because the JNI FindClass call uses the * class loader associated with the native method we're executing in. * If called in onStarted (from RuntimeInit.finishInit because we're * launching "am", for example), FindClass would see that we're calling * from a boot class' native method, and so wouldn't look for the class * we're trying to look up in CLASSPATH. Unfortunately it needs to, * because the "am" classes are not boot classes. * * The easiest fix is to call FindClass here, early on before we start * executing boot class Java code and thereby deny ourselves access to * non-boot classes. */ char* slashClassName = toSlashClassName(mClassName); mClass = env->FindClass(slashClassName); if (mClass == NULL) { ALOGE("ERROR: could not find class '%s'n", mClassName); } free(slashClassName); mClass = reinterpret_cast(env->NewGlobalRef(mClass)); } virtual void onStarted() { spproc = ProcessState::self(); ALOGV("App process: starting thread pool.n"); proc->startThreadPool(); AndroidRuntime* ar = AndroidRuntime::getRuntime(); ar->callMain(mClassName, mClass, mArgC, mArgV); IPCThreadState::self()->stopProcess(); } virtual void onZygoteInit() { spproc = ProcessState::self(); ALOGV("App process: starting thread pool.n"); proc->startThreadPool(); } virtual void onExit(int code) { if (mClassName == NULL) { // if zygote IPCThreadState::self()->stopProcess(); } AndroidRuntime::onExit(code); } const char* mParentDir; const char* mClassName; jclass mClass; int mArgC; const char* const* mArgV; }; }
可以看到,AppRuntime并没有重载start函数,所以,目前会执行其父类AndroidRuntime的start函数(frameworks/base/core/jni/AndroidRunTime.cpp):
/* * Start the Android runtime. This involves starting the virtual machine * and calling the "static void main(String[] args)" method in the class * named by "className". * * Passes the main function two arguments, the class name and the specified * options string. */ void AndroidRuntime::start(const char* className, const char* options) { ALOGD("n>>>>>> AndroidRuntime START %s <<<<<<n", className != NULL ? className : "(unknown)"); blockSigpipe(); /* * 'startSystemServer == true' means runtime is obsolete and not run from * init.rc anymore, so we print out the boot start event here. */ if (strcmp(options, "start-system-server") == 0) { /* track our progress through the boot sequence */ const int LOG_BOOT_PROGRESS_START = 3000; LOG_EVENT_LONG(LOG_BOOT_PROGRESS_START, ns2ms(systemTime(SYSTEM_TIME_MONOTONIC))); } const char* rootDir = getenv("ANDROID_ROOT"); if (rootDir == NULL) { rootDir = "/system"; if (!hasDir("/system")) { LOG_FATAL("No root directory specified, and /android does not exist."); return; } setenv("ANDROID_ROOT", rootDir, 1); } //const char* kernelHack = getenv("LD_ASSUME_KERNEL"); //ALOGD("Found LD_ASSUME_KERNEL='%s'n", kernelHack); /* start the virtual machine */ JNIEnv* env; if (startVm(&mJavaVM, &env) != 0) { return; } onVmCreated(env); /* * Register android functions. */ if (startReg(env) < 0) { ALOGE("Unable to register all android nativesn"); return; } /* * We want to call main() with a String array with arguments in it. * At present we have two arguments, the class name and an option string. * Create an array to hold them. */ jclass stringClass; jobjectArray strArray; jstring classNameStr; jstring optionsStr; stringClass = env->FindClass("java/lang/String"); assert(stringClass != NULL); strArray = env->NewObjectArray(2, stringClass, NULL); assert(strArray != NULL); classNameStr = env->NewStringUTF(className); assert(classNameStr != NULL); env->SetObjectArrayElement(strArray, 0, classNameStr); optionsStr = env->NewStringUTF(options); env->SetObjectArrayElement(strArray, 1, optionsStr); /* * Start VM. This thread becomes the main thread of the VM, and will * not return until the VM exits. */ char* slashClassName = toSlashClassName(className); jclass startClass = env->FindClass(slashClassName); if (startClass == NULL) { ALOGE("JavaVM unable to locate class '%s'n", slashClassName); /* keep going */ } else { jmethodID startMeth = env->GetStaticMethodID(startClass, "main", "([Ljava/lang/String;)V"); if (startMeth == NULL) { ALOGE("JavaVM unable to find main() in '%s'n", className); /* keep going */ } else { env->CallStaticVoidMethod(startClass, startMeth, strArray); #if 0 if (env->ExceptionCheck()) threadExitUncaughtException(env); #endif } } free(slashClassName); ALOGD("Shutting down VMn"); if (mJavaVM->DetachCurrentThread() != JNI_OK) ALOGW("Warning: unable to detach main threadn"); if (mJavaVM->DestroyJavaVM() != 0) ALOGW("Warning: VM did not shut down cleanlyn"); }
AndroidRumTime.start主要做了三项工作: startVM启动Android虚拟机,并呼叫onVMCreate函数。前面,我们有看到AppRuntime有重载onVMCreated函数(不过,对于app_process作为Zygote进程启动的情况,不做任何处理就返回)startReg注册需要的Native函数,基本Android的每个模块都有一些native实现需要和Java代码关联起来,事先注册能够提高性能,(另一种方案是第一次调用的时候查找实现函数并完成注册)查找className指定的class的main函数,并以options为参数,调用main函数。这里不对代码做解释,用java写过反射调用的人应该基本能看懂。 runtime.start最后会调用ZygoteIni的.main函数,参数为--start-system-server。 因为已经启动了java虚拟机,接下来可以执行Java代码了,所以,我们将进入下一个世界。 ZygoteInit ZygoteInit从类名看,从现在开始是真正的“Zygote”(frameworks/base/core/java/com/android/internal/os/ZygoteInit.java):
public static void main(String argv[]) { try { // Start profiling the zygote initialization. SamplingProfilerIntegration.start(); registerZygoteSocket(); EventLog.writeEvent(LOG_BOOT_PROGRESS_PRELOAD_START, SystemClock.uptimeMillis()); preload(); EventLog.writeEvent(LOG_BOOT_PROGRESS_PRELOAD_END, SystemClock.uptimeMillis()); // Finish profiling the zygote initialization. SamplingProfilerIntegration.writeZygoteSnapshot(); // Do an initial gc to clean up after startup gc(); // If requested, start system server directly from Zygote if (argv.length != 2) { throw new RuntimeException(argv[0] + USAGE_STRING); } if (argv[1].equals("start-system-server")) { startSystemServer(); } else if (!argv[1].equals("")) { throw new RuntimeException(argv[0] + USAGE_STRING); } Log.i(TAG, "Accepting command socket connections"); if (ZYGOTE_FORK_MODE) { runForkMode(); } else { runSelectLoopMode(); } closeServerSocket(); } catch (MethodAndArgsCaller caller) { caller.run(); } catch (RuntimeException ex) { Log.e(TAG, "Zygote died with exception", ex); closeServerSocket(); throw ex; } }
从代码看,main函数完成了四项工作: registerZygoteSocketpreloadstartSystemServer,因为之前AndroidRuntime是以“start-system-server”为参数调用main函数的,所以这里会执行startSystemServerrunSelectLoopMode,ZYGOTE_FORK_MODE常量恒定为false,所以会执行runSelectLoopMode 再继续分析前,先要做点铺垫。Zygote进程的作用是为了fork出新的Android进程,那Zygote是如何得知它需要fork一个进程的呢? 先看一段“客户端“的代码(frameworks/base/core/java/android/os/Process.java):
public static final ProcessStartResult start(final String processClass, final String niceName, int uid, int gid, int[] gids, int debugFlags, int mountExternal, int targetSdkVersion, String seInfo, String[] zygoteArgs) { try { return startViaZygote(processClass, niceName, uid, gid, gids, debugFlags, mountExternal, targetSdkVersion, seInfo, zygoteArgs); } catch (ZygoteStartFailedEx ex) { Log.e(LOG_TAG, "Starting VM process through Zygote failed"); throw new RuntimeException( "Starting VM process through Zygote failed", ex); } } private static void openZygoteSocketIfNeeded() throws ZygoteStartFailedEx { int retryCount; if (sPreviousZygoteOpenFailed) { /* * If we've failed before, expect that we'll fail again and * don't pause for retries. */ retryCount = 0; } else { retryCount = 10; } /* * See bug #811181: Sometimes runtime can make it up before zygote. * Really, we'd like to do something better to avoid this condition, * but for now just wait a bit... */ for (int retry = 0 ; (sZygoteSocket == null) && (retry < (retryCount + 1)) ; retry++ ) { if (retry > 0) { try { Log.i("Zygote", "Zygote not up yet, sleeping..."); Thread.sleep(ZYGOTE_RETRY_MILLIS); } catch (InterruptedException ex) { // should never happen } } try { sZygoteSocket = new LocalSocket(); sZygoteSocket.connect(new LocalSocketAddress(ZYGOTE_SOCKET, LocalSocketAddress.Namespace.RESERVED)); sZygoteInputStream = new DataInputStream(sZygoteSocket.getInputStream()); sZygoteWriter = new BufferedWriter( new OutputStreamWriter( sZygoteSocket.getOutputStream()), 256); Log.i("Zygote", "Process: zygote socket opened"); sPreviousZygoteOpenFailed = false; break; } catch (IOException ex) { if (sZygoteSocket != null) { try { sZygoteSocket.close(); } catch (IOException ex2) { Log.e(LOG_TAG,"I/O exception on close after exception", ex2); } } sZygoteSocket = null; } } if (sZygoteSocket == null) { sPreviousZygoteOpenFailed = true; throw new ZygoteStartFailedEx("connect failed"); } } private static ProcessStartResult zygoteSendArgsAndGetResult(ArrayListargs) throws ZygoteStartFailedEx { openZygoteSocketIfNeeded(); try { sZygoteWriter.write(Integer.toString(args.size())); sZygoteWriter.newLine(); int sz = args.size(); for (int i = 0; i < sz; i++) { String arg = args.get(i); if (arg.indexOf('n') >= 0) { throw new ZygoteStartFailedEx( "embedded newlines not allowed"); } sZygoteWriter.write(arg); sZygoteWriter.newLine(); } sZygoteWriter.flush(); // Should there be a timeout on this? ProcessStartResult result = new ProcessStartResult(); result.pid = sZygoteInputStream.readInt(); if (result.pid < 0) { throw new ZygoteStartFailedEx("fork() failed"); } result.usingWrapper = sZygoteInputStream.readBoolean(); return result; } catch (IOException ex) { try { if (sZygoteSocket != null) { sZygoteSocket.close(); } } catch (IOException ex2) { // we're going to fail anyway Log.e(LOG_TAG,"I/O exception on routine close", ex2); } sZygoteSocket = null; throw new ZygoteStartFailedEx(ex); } }
这段代码是framework层通知Zygote进程进行fork用的,代码略长,也没有必要仔细分析,只要看明白一点即可: framework层会通过Unix domain socket连接到Zygote进程,并发送字符串型的参数。 registerZygoteSocket 刚才有看到Process会通过domain socket连接到Zygote进程,那Zygote进程理所当然的要监听这个socket端口了,这就是registerZygoteSocket函数要做的(或者说它做了一半):
/** * Registers a server socket for zygote command connections * * @throws RuntimeException when open fails */ private static void registerZygoteSocket() { if (sServerSocket == null) { int fileDesc; try { String env = System.getenv(ANDROID_SOCKET_ENV); fileDesc = Integer.parseInt(env); } catch (RuntimeException ex) { throw new RuntimeException( ANDROID_SOCKET_ENV + " unset or invalid", ex); } try { sServerSocket = new LocalServerSocket( createFileDescriptor(fileDesc)); } catch (IOException ex) { throw new RuntimeException( "Error binding to local socket '" + fileDesc + "'", ex); } } }
registerZygoteSocket函数中,先从
private static final String ANDROID_SOCKET_ENV = "ANDROID_SOCKET_zygote";
环境变量中,读取一个int值作为文件描述父,创建文件描述符,为什么呢? 如果还记得init.rc的配置到话,一切就可以解释了:
socket zygote stream 660 root system
init进程在处理这一行属性时,会先在dev/socket下创建一个名为zygote的设备文件,然后打开这个文件,并把文件描述符保存到环境变量中。环境变量名的规则为"ANDROID_SOCKET_"+第一参数(这里为zygote)。所以,app_main就可以通过指定的环境变量来获取这个文件描述符,并由此创建LocalServerSocket。
preload
因为所有的Android进程都是从Zygote进程fork出来的,而子进程会继承Zygote的资源。换言之,如果Zygote进程持有了资源,所有android进程就都有了。所以,Zygote会预加载一些系统资源,以加速子进程的启动速度(子进程以写时复制的机制进行资源共享,所以不必担心内存占用问题。另外Android系统的启动时间可能有大概40%的时间耗费在这个preload函数上,但是从整体上来说,这是利大于弊的)。
static void preload() { preloadClasses();//加载java类 preloadResources();//加载icon,字串等资源 }
startSystemServer Zygote进程启动以后,会由Zygote进程fork出SystemServer,继续启动系统:
/** * Prepare the arguments and fork for the system server process. */ private static boolean startSystemServer() throws MethodAndArgsCaller, RuntimeException { /* Hardcoded command line to start the system server */ String args[] = { "--setuid=1000", "--setgid=1000", "--setgroups=1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1018,3001,3002,3003,3006,3007", "--capabilities=130104352,130104352", "--runtime-init", "--nice-name=system_server", "com.android.server.SystemServer", }; ZygoteConnection.Arguments parsedArgs = null; int pid; try { parsedArgs = new ZygoteConnection.Arguments(args); ZygoteConnection.applyDebuggerSystemProperty(parsedArgs); ZygoteConnection.applyInvokeWithSystemProperty(parsedArgs); /* Request to fork the system server process */ pid = Zygote.forkSystemServer( parsedArgs.uid, parsedArgs.gid, parsedArgs.gids, parsedArgs.debugFlags, null, parsedArgs.permittedCapabilities, parsedArgs.effectiveCapabilities); } catch (IllegalArgumentException ex) { throw new RuntimeException(ex); } /* For child process */ if (pid == 0) { handleSystemServerProcess(parsedArgs); } return true; }
首先,确定SystemServer的启动参数,然后,调用Zygote.forkSysetmServer函数,开始执行fork操作。这一块和SystemServer的启动过程比较相关,留待后面的文章分析。暂时只需要记住SystemServer是在这个位置启动的就好,其他的可以暂时忽略。
runSelectLoopMode
前面,我们已经得到LocalServerLocket了,现在需要开始监听LocalServerScoket了:
/** * Runs the zygote process's select loop. Accepts new connections as * they happen, and reads commands from connections one spawn-request's * worth at a time. * * @throws MethodAndArgsCaller in a child process when a main() should * be executed. */ private static void runSelectLoopMode() throws MethodAndArgsCaller { ArrayListfds = new ArrayList(); ArrayListpeers = new ArrayList(); FileDescriptor[] fdArray = new FileDescriptor[4]; fds.add(sServerSocket.getFileDescriptor()); peers.add(null); int loopCount = GC_LOOP_COUNT; while (true) { int index; /* * Call gc() before we block in select(). * It's work that has to be done anyway, and it's better * to avoid making every child do it. It will also * madvise() any free memory as a side-effect. * * Don't call it every time, because walking the entire * heap is a lot of overhead to free a few hundred bytes. */ if (loopCount <= 0) { gc(); loopCount = GC_LOOP_COUNT; } else { loopCount--; } try { fdArray = fds.toArray(fdArray); index = selectReadable(fdArray); } catch (IOException ex) { throw new RuntimeException("Error in select()", ex); } if (index < 0) { throw new RuntimeException("Error in select()"); } else if (index == 0) {//index=0 为监听端口 ZygoteConnection newPeer = acceptCommandPeer(); peers.add(newPeer); fds.add(newPeer.getFileDesciptor()); } else { boolean done; done = peers.get(index).runOnce(); if (done) { peers.remove(index); fds.remove(index); } } } }
其中,selectReadable函数为native函数,实现了一个多连接的等待功能:
static jint com_android_internal_os_ZygoteInit_selectReadable ( JNIEnv *env, jobject clazz, jobjectArray fds) { if (fds == NULL) { jniThrowNullPointerException(env, "fds == null"); return -1; } jsize length = env->GetArrayLength(fds); fd_set fdset;//fdset为连接集合 if (env->ExceptionOccurred() != NULL) { return -1; } FD_ZERO(&fdset);//清空fdset int nfds = 0; for (jsize i = 0; i < length; i++) {//把所有的连接保存到fdset中 jobject fdObj = env->GetObjectArrayElement(fds, i); if (env->ExceptionOccurred() != NULL) { return -1; } if (fdObj == NULL) { continue; } int fd = jniGetFDFromFileDescriptor(env, fdObj); if (env->ExceptionOccurred() != NULL) { return -1; } FD_SET(fd, &fdset);//保存fd到fdset中 if (fd >= nfds) { nfds = fd + 1; } } int err; do { err = select (nfds, &fdset, NULL, NULL, NULL);//fdset中任何一个连接有数据可读,则返回该连接在fdset的序号,否则等待 } while (err < 0 && errno == EINTR); if (err < 0) { jniThrowIOException(env, errno); return -1; } for (jsize i = 0; i < length; i++) { jobject fdObj = env->GetObjectArrayElement(fds, i); if (env->ExceptionOccurred() != NULL) { return -1; } if (fdObj == NULL) { continue; } int fd = jniGetFDFromFileDescriptor(env, fdObj); if (env->ExceptionOccurred() != NULL) { return -1; } if (FD_ISSET(fd, &fdset)) { return (jint)i;//返回有数据可读到连接的序号 } } return -1; }
有数据可读的连接的序号为0,则为监听端口,应该accept这个连接:
/** * Waits for and accepts a single command connection. Throws * RuntimeException on failure. */ private static ZygoteConnection acceptCommandPeer() { try { return new ZygoteConnection(sServerSocket.accept()); } catch (IOException ex) { throw new RuntimeException( "IOException during accept()", ex); } }
有数据可读的连接的序号不为0,则说明有fork指令过来,执行ZygoteConnection.runOnce(),并视需要移除这个连接。这个过程和App进程的启动相关,之后的文章会做进一步分析。 总结 Zygote进程由Init进程启动Zygote进程的实体为app_processZygote初始化Android虚拟机以后,开始执行Java类ZygoteInitZygoteInit会启动SystemServerZygoteInit会监听domain socket “dev/socket/zygote”,以执行framework层指定的fork操作