Files
chromium_depot_tools/subprocess2.py
maruel@chromium.org 65be6f6bcf Add callback support for stdout and stderr.
It's currently an inefficient thread implementation. Interestingly
enough, callback support is significantly faster on cygwin than on
native python.

Writing an efficient implementation is punted for a later change,
one per implementation.

Stops using a temporary file since it's not necessary anymore.

The goal is to reduce the number of places where a similar paradigm
is used by having a canonical generic implementation.

R=dpranke@chromium.org
BUG=
TEST=Tested manually on Windows, cygwin, linux, OSX 10.6


Review URL: http://codereview.chromium.org/8374026

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@109239 0039d316-1c4b-4281-b951-d872f2087c98
2011-11-09 13:57:57 +00:00

455 lines
13 KiB
Python

# coding=utf8
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Collection of subprocess wrapper functions.
In theory you shouldn't need anything else in subprocess, or this module failed.
"""
from __future__ import with_statement
import cStringIO
import errno
import logging
import os
import Queue
import subprocess
import sys
import time
import threading
# Constants forwarded from subprocess.
PIPE = subprocess.PIPE
STDOUT = subprocess.STDOUT
# Sends stdout or stderr to os.devnull.
VOID = object()
# Error code when a process was killed because it timed out.
TIMED_OUT = -2001
# Globals.
# Set to True if you somehow need to disable this hack.
SUBPROCESS_CLEANUP_HACKED = False
class CalledProcessError(subprocess.CalledProcessError):
"""Augment the standard exception with more data."""
def __init__(self, returncode, cmd, cwd, stdout, stderr):
super(CalledProcessError, self).__init__(returncode, cmd)
self.stdout = stdout
self.stderr = stderr
self.cwd = cwd
def __str__(self):
out = 'Command %s returned non-zero exit status %s' % (
' '.join(self.cmd), self.returncode)
if self.cwd:
out += ' in ' + self.cwd
return '\n'.join(filter(None, (out, self.stdout, self.stderr)))
class CygwinRebaseError(CalledProcessError):
"""Occurs when cygwin's fork() emulation fails due to rebased dll."""
## Utility functions
def kill_pid(pid):
"""Kills a process by its process id."""
try:
# Unable to import 'module'
# pylint: disable=E1101,F0401
import signal
return os.kill(pid, signal.SIGKILL)
except ImportError:
pass
def kill_win(process):
"""Kills a process with its windows handle.
Has no effect on other platforms.
"""
try:
# Unable to import 'module'
# pylint: disable=F0401
import win32process
# Access to a protected member _handle of a client class
# pylint: disable=W0212
return win32process.TerminateProcess(process._handle, -1)
except ImportError:
pass
def add_kill():
"""Adds kill() method to subprocess.Popen for python <2.6"""
if hasattr(subprocess.Popen, 'kill'):
return
if sys.platform == 'win32':
subprocess.Popen.kill = kill_win
else:
subprocess.Popen.kill = lambda process: kill_pid(process.pid)
def hack_subprocess():
"""subprocess functions may throw exceptions when used in multiple threads.
See http://bugs.python.org/issue1731717 for more information.
"""
global SUBPROCESS_CLEANUP_HACKED
if not SUBPROCESS_CLEANUP_HACKED and threading.activeCount() != 1:
# Only hack if there is ever multiple threads.
# There is no point to leak with only one thread.
subprocess._cleanup = lambda: None
SUBPROCESS_CLEANUP_HACKED = True
def get_english_env(env):
"""Forces LANG and/or LANGUAGE to be English.
Forces encoding to utf-8 for subprocesses.
Returns None if it is unnecessary.
"""
if sys.platform == 'win32':
return None
env = env or os.environ
# Test if it is necessary at all.
is_english = lambda name: env.get(name, 'en').startswith('en')
if is_english('LANG') and is_english('LANGUAGE'):
return None
# Requires modifications.
env = env.copy()
def fix_lang(name):
if not is_english(name):
env[name] = 'en_US.UTF-8'
fix_lang('LANG')
fix_lang('LANGUAGE')
return env
def Popen(args, **kwargs):
"""Wraps subprocess.Popen() with various workarounds.
Returns a subprocess.Popen object.
- Forces English output since it's easier to parse the stdout if it is always
in English.
- Sets shell=True on windows by default. You can override this by forcing
shell parameter to a value.
- Adds support for VOID to not buffer when not needed.
Note: Popen() can throw OSError when cwd or args[0] doesn't exist. Translate
exceptions generated by cygwin when it fails trying to emulate fork().
"""
# Make sure we hack subprocess if necessary.
hack_subprocess()
add_kill()
env = get_english_env(kwargs.get('env'))
if env:
kwargs['env'] = env
if kwargs.get('shell') is None:
# *Sigh*: Windows needs shell=True, or else it won't search %PATH% for the
# executable, but shell=True makes subprocess on Linux fail when it's called
# with a list because it only tries to execute the first item in the list.
kwargs['shell'] = bool(sys.platform=='win32')
if isinstance(args, basestring):
tmp_str = args
elif isinstance(args, (list, tuple)):
tmp_str = ' '.join(args)
else:
raise CalledProcessError(None, args, kwargs.get('cwd'), None, None)
if kwargs.get('cwd', None):
tmp_str += '; cwd=%s' % kwargs['cwd']
logging.debug(tmp_str)
def fix(stream):
if kwargs.get(stream) in (VOID, os.devnull):
# Replaces VOID with handle to /dev/null.
# Create a temporary file to workaround python's deadlock.
# http://docs.python.org/library/subprocess.html#subprocess.Popen.wait
# When the pipe fills up, it will deadlock this process. Using a real file
# works around that issue.
kwargs[stream] = open(os.devnull, 'w')
if callable(kwargs.get(stream)):
# Callable stdout/stderr should be used only with call() wrappers.
kwargs[stream] = PIPE
fix('stdout')
fix('stderr')
try:
return subprocess.Popen(args, **kwargs)
except OSError, e:
if e.errno == errno.EAGAIN and sys.platform == 'cygwin':
# Convert fork() emulation failure into a CygwinRebaseError().
raise CygwinRebaseError(
e.errno,
args,
kwargs.get('cwd'),
None,
'Visit '
'http://code.google.com/p/chromium/wiki/CygwinDllRemappingFailure to '
'learn how to fix this error; you need to rebase your cygwin dlls')
# Popen() can throw OSError when cwd or args[0] doesn't exist. Let it go
# through
raise
def _queue_pipe_read(pipe, name, done, dest):
"""Queue characters read from a pipe into a queue.
Left outside the _tee_threads function to not introduce a function closure
to speed up variable lookup.
"""
while not done.isSet():
data = pipe.read(1)
if not data:
break
dest.put((name, data))
dest.put(name)
def _tee_threads(proc, timeout, start, stdin, args, kwargs):
"""Does I/O for a process's pipes using thread.
It's the simplest and slowest implementation. Expect very slow behavior.
If there is a callback and it doesn't keep up with the calls, the timeout
effectiveness will be delayed accordingly.
"""
# TODO(maruel): Implement a select based implementation on POSIX and a Windows
# one using WaitForMultipleObjects().
#
# Queue of either of <threadname> when done or (<threadname>, data).
# In theory we would like to limit to ~64kb items to not cause large memory
# usage when the callback blocks. It is not done because it slows down
# processing on OSX10.6 by a factor of 2x, making it even slower than Windows!
# Revisit this decision if it becomes a problem, e.g. crash because of
# memory exhaustion.
queue = Queue.Queue()
done = threading.Event()
def write_stdin():
stdin_io = cStringIO.StringIO(stdin)
while not done.isSet():
data = stdin_io.read(1024)
if data:
proc.stdin.write(data)
else:
proc.stdin.close()
break
queue.put('stdin')
def timeout_fn():
done.wait(timeout)
# No need to close the pipes since killing should be sufficient.
queue.put('timeout')
# Starts up to 4 threads:
# Read stdout
# Read stderr
# Write stdin
# Timeout
threads = {}
if timeout is not None:
threads['timeout'] = threading.Thread(target=timeout_fn)
if callable(kwargs.get('stdout')):
threads['stdout'] = threading.Thread(
target=_queue_pipe_read, args=(proc.stdout, 'stdout', done, queue))
if callable(kwargs.get('stderr')):
threads['stderr'] = threading.Thread(
target=_queue_pipe_read,
args=(proc.stderr, 'stderr', done, queue))
if isinstance(stdin, str):
threads['stdin'] = threading.Thread(target=write_stdin)
for t in threads.itervalues():
t.daemon = True
t.start()
timed_out = False
try:
while proc.returncode is None:
assert threads
proc.poll()
item = queue.get()
if isinstance(item, str):
threads[item].join()
del threads[item]
if item == 'timeout' and not timed_out and proc.poll() is None:
logging.debug('Timed out: killing')
proc.kill()
timed_out = True
if not threads:
# We won't be waken up anymore. Need to busy loop.
break
else:
kwargs[item[0]](item[1])
finally:
# Stop the threads.
done.set()
# Join threads
for thread in threads.itervalues():
thread.join()
# Flush the queue.
try:
while True:
item = queue.get(False)
if isinstance(item, str):
if item == 'timeout':
# TODO(maruel): Does it make sense at that point?
if not timed_out and proc.poll() is None:
logging.debug('Timed out: killing')
proc.kill()
timed_out = True
else:
kwargs[item[0]](item[1])
except Queue.Empty:
pass
# Get the remainder.
if callable(kwargs.get('stdout')):
data = proc.stdout.read()
while data:
kwargs['stdout'](data)
data = proc.stdout.read()
if callable(kwargs.get('stderr')):
data = proc.stderr.read()
while data:
kwargs['stderr'](data)
data = proc.stderr.read()
if proc.returncode is None:
# Usually happens when killed with timeout but not listening to pipes.
proc.wait()
if timed_out:
return TIMED_OUT
return proc.returncode
def communicate(args, timeout=None, **kwargs):
"""Wraps subprocess.Popen().communicate().
Returns ((stdout, stderr), returncode).
- The process will be killed after |timeout| seconds and returncode set to
TIMED_OUT.
- Automatically passes stdin content as input so do not specify stdin=PIPE.
"""
if timeout and kwargs.get('shell'):
raise TypeError(
'Using timeout and shell simultaneously will cause a process leak '
'since the shell will be killed instead of the child process.')
stdin = kwargs.pop('stdin', None)
if stdin is not None:
if stdin is VOID:
kwargs['stdin'] = open(os.devnull, 'r')
stdin = None
else:
assert isinstance(stdin, basestring)
# When stdin is passed as an argument, use it as the actual input data and
# set the Popen() parameter accordingly.
kwargs['stdin'] = PIPE
start = time.time()
proc = Popen(args, **kwargs)
need_buffering = (timeout or
callable(kwargs.get('stdout')) or callable(kwargs.get('stderr')))
if not need_buffering:
# Normal workflow.
if stdin not in (None, VOID):
return proc.communicate(stdin), proc.returncode
else:
return proc.communicate(), proc.returncode
stdout = None
stderr = None
# Convert to a lambda to workaround python's deadlock.
# http://docs.python.org/library/subprocess.html#subprocess.Popen.wait
# When the pipe fills up, it will deadlock this process. Using a thread
# works around that issue. No need for thread safe function since the call
# backs are guaranteed to be called from the main thread.
if kwargs.get('stdout') == PIPE:
stdout = []
kwargs['stdout'] = stdout.append
if kwargs.get('stderr') == PIPE:
stderr = []
kwargs['stderr'] = stderr.append
returncode = _tee_threads(proc, timeout, start, stdin, args, kwargs)
if not stdout is None:
stdout = ''.join(stdout)
if not stderr is None:
stderr = ''.join(stderr)
return (stdout, stderr), returncode
def call(args, **kwargs):
"""Emulates subprocess.call().
Automatically convert stdout=PIPE or stderr=PIPE to VOID.
In no case they can be returned since no code path raises
subprocess2.CalledProcessError.
"""
if kwargs.get('stdout') == PIPE:
kwargs['stdout'] = VOID
if kwargs.get('stderr') == PIPE:
kwargs['stderr'] = VOID
return communicate(args, **kwargs)[1]
def check_call_out(args, **kwargs):
"""Improved version of subprocess.check_call().
Returns (stdout, stderr), unlike subprocess.check_call().
"""
out, returncode = communicate(args, **kwargs)
if returncode:
raise CalledProcessError(
returncode, args, kwargs.get('cwd'), out[0], out[1])
return out
def check_call(args, **kwargs):
"""Emulate subprocess.check_call()."""
check_call_out(args, **kwargs)
return 0
def capture(args, **kwargs):
"""Captures stdout of a process call and returns it.
Returns stdout.
- Discards returncode.
- Blocks stdin by default if not specified since no output will be visible.
"""
kwargs.setdefault('stdin', VOID)
# Like check_output, deny the caller from using stdout arg.
return communicate(args, stdout=PIPE, **kwargs)[0][0]
def check_output(args, **kwargs):
"""Emulates subprocess.check_output().
Captures stdout of a process call and returns stdout only.
- Throws if return code is not 0.
- Works even prior to python 2.7.
- Blocks stdin by default if not specified since no output will be visible.
- As per doc, "The stdout argument is not allowed as it is used internally."
"""
kwargs.setdefault('stdin', VOID)
return check_call_out(args, stdout=PIPE, **kwargs)[0]