com.android.server.Watchdog.java
程序员文章站
2022-09-04 17:02:00
/*
* copyright (c) 2008 the android open source project
*
* licensed under the apache licen...
/* * copyright (c) 2008 the android open source project * * licensed under the apache license, version 2.0 (the "license"); * you may not use this file except in compliance with the license. * you may obtain a copy of the license at * *https://www.apache.org/licenses/license-2.0 * * unless required by applicable law or agreed to in writing, software * distributed under the license is distributed on an "as is" basis, * without warranties or conditions of any kind, either express or implied. * see the license for the specific language governing permissions and * limitations under the license. */ package com.android.server; import android.app.iactivitycontroller; import android.os.binder; import android.os.remoteexception; import com.android.server.am.activitymanagerservice; import android.content.broadcastreceiver; import android.content.contentresolver; import android.content.context; import android.content.intent; import android.content.intentfilter; import android.os.debug; import android.os.handler; import android.os.ipowermanager; import android.os.looper; import android.os.process; import android.os.servicemanager; import android.os.systemclock; import android.os.systemproperties; import android.util.eventlog; import android.util.log; import android.util.slog; import java.io.file; import java.io.filewriter; import java.io.ioexception; import java.util.arraylist; /** this class calls its monitor every minute. killing this process if they don't return **/ public class watchdog extends thread { static final string tag = "watchdog"; static final boolean locallogv = false || false; // set this to true to use debug default values. static final boolean db = false; // set this to true to have the watchdog record kernel thread stacks when it fires static final boolean record_kernel_threads = true; static final long default_timeout = db ? 10*1000 : 60*1000; static final long check_interval = default_timeout / 2; // these are temporally ordered: larger values as lateness increases static final int completed = 0; static final int waiting = 1; static final int waited_half = 2; static final int overdue = 3; // which native processes to dump into dropbox's stack traces public static final string[] native_stacks_of_interest = new string[] { "/system/bin/mediaserver", "/system/bin/sdcard", "/system/bin/surfaceflinger" }; static watchdog swatchdog; /* this handler will be used to post message back onto the main thread */ final arraylist mhandlercheckers = new arraylist(); final handlerchecker mmonitorchecker; contentresolver mresolver; activitymanagerservice mactivity; int mphonepid; iactivitycontroller mcontroller; boolean mallowrestart = true; /** * used for checking status of handle threads and scheduling monitor callbacks. */ public final class handlerchecker implements runnable { private final handler mhandler; private final string mname; private final long mwaitmax; private final arraylist mmonitors = new arraylist(); private boolean mcompleted; private monitor mcurrentmonitor; private long mstarttime; handlerchecker(handler handler, string name, long waitmaxmillis) { mhandler = handler; mname = name; mwaitmax = waitmaxmillis; mcompleted = true; } public void addmonitor(monitor monitor) { mmonitors.add(monitor); } public void schedulechecklocked() { if (mmonitors.size() == 0 && mhandler.getlooper().isidling()) { // if the target looper is or just recently was idling, then // there is no reason to enqueue our checker on it since that // is as good as it not being deadlocked. this avoid having // to do a context switch to check the thread. note that we // only do this if mcheckreboot is false and we have no // monitors, since those would need to be executed at this point. mcompleted = true; return; } if (!mcompleted) { // we already have a check in flight, so no need return; } mcompleted = false; mcurrentmonitor = null; mstarttime = systemclock.uptimemillis(); mhandler.postatfrontofqueue(this); } public boolean isoverduelocked() { return (!mcompleted) && (systemclock.uptimemillis() > mstarttime + mwaitmax); } public int getcompletionstatelocked() { if (mcompleted) { return completed; } else { long latency = systemclock.uptimemillis() - mstarttime; if (latency < mwaitmax/2) { return waiting; } else if (latency < mwaitmax) { return waited_half; } } return overdue; } public thread getthread() { return mhandler.getlooper().getthread(); } public string getname() { return mname; } public string describeblockedstatelocked() { if (mcurrentmonitor == null) { return "blocked in handler on " + mname + " (" + getthread().getname() + ")"; } else { return "blocked in monitor " + mcurrentmonitor.getclass().getname() + " on " + mname + " (" + getthread().getname() + ")"; } } @override public void run() { final int size = mmonitors.size(); for (int i = 0 ; i < size ; i++) { synchronized (watchdog.this) { mcurrentmonitor = mmonitors.get(i); } mcurrentmonitor.monitor(); } synchronized (watchdog.this) { mcompleted = true; mcurrentmonitor = null; } } } final class rebootrequestreceiver extends broadcastreceiver { @override public void onreceive(context c, intent intent) { if (intent.getintextra("nowait", 0) != 0) { rebootsystem("received action_reboot broadcast"); return; } slog.w(tag, "unsupported action_reboot broadcast: " + intent); } } public interface monitor { void monitor(); } public static watchdog getinstance() { if (swatchdog == null) { swatchdog = new watchdog(); } return swatchdog; } private watchdog() { super("watchdog"); // initialize handler checkers for each common thread we want to check. note // that we are not currently checking the background thread, since it can // potentially hold longer running operations with no guarantees about the timeliness // of operations there. // the shared foreground thread is the main checker. it is where we // will also dispatch monitor checks and do other work. mmonitorchecker = new handlerchecker(fgthread.gethandler(), "foreground thread", default_timeout); mhandlercheckers.add(mmonitorchecker); // add checker for main thread. we only do a quick check since there // can be ui running on the thread. mhandlercheckers.add(new handlerchecker(new handler(looper.getmainlooper()), "main thread", default_timeout)); // add checker for shared ui thread. mhandlercheckers.add(new handlerchecker(uithread.gethandler(), "ui thread", default_timeout)); // and also check io thread. mhandlercheckers.add(new handlerchecker(iothread.gethandler(), "i/o thread", default_timeout)); // and the display thread. mhandlercheckers.add(new handlerchecker(displaythread.gethandler(), "display thread", default_timeout)); } public void init(context context, activitymanagerservice activity) { mresolver = context.getcontentresolver(); mactivity = activity; context.registerreceiver(new rebootrequestreceiver(), new intentfilter(intent.action_reboot), android.manifest.permission.reboot, null); } public void processstarted(string name, int pid) { synchronized (this) { if ("com.android.phone".equals(name)) { mphonepid = pid; } } } public void setactivitycontroller(iactivitycontroller controller) { synchronized (this) { mcontroller = controller; } } public void setallowrestart(boolean allowrestart) { synchronized (this) { mallowrestart = allowrestart; } } public void addmonitor(monitor monitor) { synchronized (this) { if (isalive()) { throw new runtimeexception("monitors can't be added once the watchdog is running"); } mmonitorchecker.addmonitor(monitor); } } public void addthread(handler thread) { addthread(thread, default_timeout); } public void addthread(handler thread, long timeoutmillis) { synchronized (this) { if (isalive()) { throw new runtimeexception("threads can't be added once the watchdog is running"); } final string name = thread.getlooper().getthread().getname(); mhandlercheckers.add(new handlerchecker(thread, name, timeoutmillis)); } } /** * perform a full reboot of the system. */ void rebootsystem(string reason) { slog.i(tag, "rebooting system because: " + reason); ipowermanager pms = (ipowermanager)servicemanager.getservice(context.power_service); try { pms.reboot(false, reason, false); } catch (remoteexception ex) { } } private int evaluatecheckercompletionlocked() { int state = completed; for (int i=0; i getblockedcheckerslocked() { arraylist checkers = new arraylist(); for (int i=0; i checkers) { stringbuilder builder = new stringbuilder(128); for (int i=0; i 0) { builder.append(", "); } builder.append(checkers.get(i).describeblockedstatelocked()); } return builder.tostring(); } @override public void run() { boolean waitedhalf = false; while (true) { final arraylist blockedcheckers; final string subject; final boolean allowrestart; int debuggerwasconnected = 0; synchronized (this) { long timeout = check_interval; // make sure we (re)spin the checkers that have become idle within // this wait-and-check interval for (int i=0; i 0) { debuggerwasconnected--; } // note: we use uptimemillis() here because we do not want to increment the time we // wait while asleep. if the device is asleep then the thing that we are waiting // to timeout on is asleep as well and won't have a chance to run, causing a false // positive on when to kill things. long start = systemclock.uptimemillis(); while (timeout > 0) { if (debug.isdebuggerconnected()) { debuggerwasconnected = 2; } try { wait(timeout); } catch (interruptedexception e) { log.wtf(tag, e); } if (debug.isdebuggerconnected()) { debuggerwasconnected = 2; } timeout = check_interval - (systemclock.uptimemillis() - start); } final int waitstate = evaluatecheckercompletionlocked(); if (waitstate == completed) { // the monitors have returned; reset waitedhalf = false; continue; } else if (waitstate == waiting) { // still waiting but within their configured intervals; back off and recheck continue; } else if (waitstate == waited_half) { if (!waitedhalf) { // we've waited half the deadlock-detection interval. pull a stack // trace and wait another half. arraylist pids = new arraylist(); pids.add(process.mypid()); activitymanagerservice.dumpstacktraces(true, pids, null, null, native_stacks_of_interest); waitedhalf = true; } continue; } // something is overdue! blockedcheckers = getblockedcheckerslocked(); subject = describecheckerslocked(blockedcheckers); allowrestart = mallowrestart; } // if we got here, that means that the system is most likely hung. // first collect stack traces from all threads of the system process. // then kill this process so that the system will restart. eventlog.writeevent(eventlogtags.watchdog, subject); arraylist pids = new arraylist(); pids.add(process.mypid()); if (mphonepid > 0) pids.add(mphonepid); // pass !waitedhalf so that just in case we somehow wind up here without having // dumped the halfway stacks, we properly re-initialize the trace file. final file stack = activitymanagerservice.dumpstacktraces( !waitedhalf, pids, null, null, native_stacks_of_interest); // give some extra time to make sure the stack traces get written. // the system's been hanging for a minute, another second or two won't hurt much. systemclock.sleep(2000); // pull our own kernel thread stacks as well if we're configured for that if (record_kernel_threads) { dumpkernelstacktraces(); } // trigger the kernel to dump all blocked threads, and backtraces on all cpus to the kernel log dosysrq('w'); dosysrq('l'); // try to add the error to the dropbox, but assuming that the activitymanager // itself may be deadlocked. (which has happened, causing this statement to // deadlock and the watchdog as a whole to be ineffective) thread dropboxthread = new thread("watchdogwritetodropbox") { public void run() { mactivity.adderrortodropbox( "watchdog", null, "system_server", null, null, subject, null, stack, null); } }; dropboxthread.start(); try { dropboxthread.join(2000); // wait up to 2 seconds for it to return. } catch (interruptedexception ignored) {} iactivitycontroller controller; synchronized (this) { controller = mcontroller; } if (controller != null) { slog.i(tag, "reporting stuck state to activity controller"); try { binder.setdumpdisabled("service dumps disabled due to hung system process."); // 1 = keep waiting, -1 = kill system int res = controller.systemnotresponding(subject); if (res >= 0) { slog.i(tag, "activity controller requested to coninue to wait"); waitedhalf = false; continue; } } catch (remoteexception e) { } } // only kill the process if the debugger is not attached. if (debug.isdebuggerconnected()) { debuggerwasconnected = 2; } if (debuggerwasconnected >= 2) { slog.w(tag, "debugger connected: watchdog is *not* killing the system process"); } else if (debuggerwasconnected > 0) { slog.w(tag, "debugger was connected: watchdog is *not* killing the system process"); } else if (!allowrestart) { slog.w(tag, "restart not allowed: watchdog is *not* killing the system process"); } else { slog.w(tag, "*** watchdog killing system process: " + subject); for (int i=0; i