From: Guillaume Pellerin Date: Mon, 25 Nov 2013 11:18:27 +0000 (+0100) Subject: " " X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=94f2ea80e0dc7602d5014946682a3598b6a2944f;p=gix.git " " --- diff --git a/apps/git-python/git/cmd.py b/apps/git-python/git/cmd.py new file mode 100644 index 0000000..576a530 --- /dev/null +++ b/apps/git-python/git/cmd.py @@ -0,0 +1,574 @@ +# cmd.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import os, sys +from util import ( + LazyMixin, + stream_copy + ) +from exc import GitCommandError + +from subprocess import ( + call, + Popen, + PIPE + ) + +execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', + 'with_exceptions', 'as_process', + 'output_stream' ) + +__all__ = ('Git', ) + +def dashify(string): + return string.replace('_', '-') + + +class Git(LazyMixin): + """ + The Git class manages communication with the Git binary. + + It provides a convenient interface to calling the Git binary, such as in:: + + g = Git( git_dir ) + g.init() # calls 'git init' program + rval = g.ls_files() # calls 'git ls-files' program + + ``Debugging`` + Set the GIT_PYTHON_TRACE environment variable print each invocation + of the command to stdout. + Set its value to 'full' to see details about the returned values. + """ + __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info") + + # CONFIGURATION + # The size in bytes read from stdout when copying git's output to another stream + max_chunk_size = 1024*64 + + git_exec_name = "git" # default that should work on linux and windows + git_exec_name_win = "git.cmd" # alternate command name, windows only + + # Enables debugging of GitPython's git commands + GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) + + # Provide the full path to the git executable. Otherwise it assumes git is in the path + _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" + GIT_PYTHON_GIT_EXECUTABLE = os.environ.get(_git_exec_env_var, git_exec_name) + + + class AutoInterrupt(object): + """Kill/Interrupt the stored process instance once this instance goes out of scope. It is + used to prevent processes piling up in case iterators stop reading. + Besides all attributes are wired through to the contained process object. + + The wait method was overridden to perform automatic status code checking + and possibly raise.""" + __slots__= ("proc", "args") + + def __init__(self, proc, args ): + self.proc = proc + self.args = args + + def __del__(self): + # did the process finish already so we have a return code ? + if self.proc.poll() is not None: + return + + # can be that nothing really exists anymore ... + if os is None: + return + + # try to kill it + try: + os.kill(self.proc.pid, 2) # interrupt signal + except AttributeError: + # try windows + # for some reason, providing None for stdout/stderr still prints something. This is why + # we simply use the shell and redirect to nul. Its slower than CreateProcess, question + # is whether we really want to see all these messages. Its annoying no matter what. + call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) + # END exception handling + + def __getattr__(self, attr): + return getattr(self.proc, attr) + + def wait(self): + """Wait for the process and return its status code. + + :raise GitCommandError: if the return status is not 0""" + status = self.proc.wait() + if status != 0: + raise GitCommandError(self.args, status, self.proc.stderr.read()) + # END status handling + return status + # END auto interrupt + + class CatFileContentStream(object): + """Object representing a sized read-only stream returning the contents of + an object. + It behaves like a stream, but counts the data read and simulates an empty + stream once our sized content region is empty. + If not all data is read to the end of the objects's lifetime, we read the + rest to assure the underlying stream continues to work""" + + __slots__ = ('_stream', '_nbr', '_size') + + def __init__(self, size, stream): + self._stream = stream + self._size = size + self._nbr = 0 # num bytes read + + # special case: if the object is empty, has null bytes, get the + # final newline right away. + if size == 0: + stream.read(1) + # END handle empty streams + + def read(self, size=-1): + bytes_left = self._size - self._nbr + if bytes_left == 0: + return '' + if size > -1: + # assure we don't try to read past our limit + size = min(bytes_left, size) + else: + # they try to read all, make sure its not more than what remains + size = bytes_left + # END check early depletion + data = self._stream.read(size) + self._nbr += len(data) + + # check for depletion, read our final byte to make the stream usable by others + if self._size - self._nbr == 0: + self._stream.read(1) # final newline + # END finish reading + return data + + def readline(self, size=-1): + if self._nbr == self._size: + return '' + + # clamp size to lowest allowed value + bytes_left = self._size - self._nbr + if size > -1: + size = min(bytes_left, size) + else: + size = bytes_left + # END handle size + + data = self._stream.readline(size) + self._nbr += len(data) + + # handle final byte + if self._size - self._nbr == 0: + self._stream.read(1) + # END finish reading + + return data + + def readlines(self, size=-1): + if self._nbr == self._size: + return list() + + # leave all additional logic to our readline method, we just check the size + out = list() + nbr = 0 + while True: + line = self.readline() + if not line: + break + out.append(line) + if size > -1: + nbr += len(line) + if nbr > size: + break + # END handle size constraint + # END readline loop + return out + + def __iter__(self): + return self + + def next(self): + line = self.readline() + if not line: + raise StopIteration + return line + + def __del__(self): + bytes_left = self._size - self._nbr + if bytes_left: + # read and discard - seeking is impossible within a stream + # includes terminating newline + self._stream.read(bytes_left + 1) + # END handle incomplete read + + + def __init__(self, working_dir=None): + """Initialize this instance with: + + :param working_dir: + Git directory we should work in. If None, we always work in the current + directory as returned by os.getcwd(). + It is meant to be the working tree directory if available, or the + .git directory in case of bare repositories.""" + super(Git, self).__init__() + self._working_dir = working_dir + + # cached command slots + self.cat_file_header = None + self.cat_file_all = None + + def __getattr__(self, name): + """A convenience method as it allows to call the command as if it was + an object. + :return: Callable object that will execute call _call_process with your arguments.""" + if name[0] == '_': + return LazyMixin.__getattr__(self, name) + return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) + + def _set_cache_(self, attr): + if attr == '_version_info': + # We only use the first 4 numbers, as everthing else could be strings in fact (on windows) + version_numbers = self._call_process('version').split(' ')[2] + self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4]) + else: + super(Git, self)._set_cache_(attr) + #END handle version info + + + @property + def working_dir(self): + """:return: Git directory we are working on""" + return self._working_dir + + @property + def version_info(self): + """ + :return: tuple(int, int, int, int) tuple with integers representing the major, minor + and additional version numbers as parsed from git version. + This value is generated on demand and is cached""" + return self._version_info + + def execute(self, command, + istream=None, + with_keep_cwd=False, + with_extended_output=False, + with_exceptions=True, + as_process=False, + output_stream=None, + **subprocess_kwargs + ): + """Handles executing the command on the shell and consumes and returns + the returned information (stdout) + + :param command: + The command argument list to execute. + It should be a string, or a sequence of program arguments. The + program to execute is the first item in the args sequence or string. + + :param istream: + Standard input filehandle passed to subprocess.Popen. + + :param with_keep_cwd: + Whether to use the current working directory from os.getcwd(). + The cmd otherwise uses its own working_dir that it has been initialized + with if possible. + + :param with_extended_output: + Whether to return a (status, stdout, stderr) tuple. + + :param with_exceptions: + Whether to raise an exception when git returns a non-zero status. + + :param as_process: + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output and + with_exceptions ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. + + :param output_stream: + If set to a file-like object, data produced by the git command will be + output to the given stream directly. + This feature only has any effect if as_process is False. Processes will + always be created with a pipe due to issues with subprocess. + This merely is a workaround as data will be copied from the + output pipe to the given output stream directly. + + :param subprocess_kwargs: + Keyword arguments to be passed to subprocess.Popen. Please note that + some of the valid kwargs are already set by this method, the ones you + specify may not be the same ones. + + :return: + * str(output) if extended_output = False (Default) + * tuple(int(status), str(stdout), str(stderr)) if extended_output = True + + if ouput_stream is True, the stdout value will be your output stream: + * output_stream if extended_output = False + * tuple(int(status), output_stream, str(stderr)) if extended_output = True + + :raise GitCommandError: + + :note: + If you add additional keyword arguments to the signature of this method, + you must update the execute_kwargs tuple housed in this module.""" + if self.GIT_PYTHON_TRACE and not self.GIT_PYTHON_TRACE == 'full': + print ' '.join(command) + + # Allow the user to have the command executed in their working dir. + if with_keep_cwd or self._working_dir is None: + cwd = os.getcwd() + else: + cwd=self._working_dir + + # Start the process + proc = Popen(command, + cwd=cwd, + stdin=istream, + stderr=PIPE, + stdout=PIPE, + close_fds=(os.name=='posix'),# unsupported on linux + **subprocess_kwargs + ) + if as_process: + return self.AutoInterrupt(proc, command) + + # Wait for the process to return + status = 0 + stdout_value = '' + stderr_value = '' + try: + if output_stream is None: + stdout_value, stderr_value = proc.communicate() + # strip trailing "\n" + if stdout_value.endswith("\n"): + stdout_value = stdout_value[:-1] + if stderr_value.endswith("\n"): + stderr_value = stderr_value[:-1] + status = proc.returncode + else: + stream_copy(proc.stdout, output_stream, self.max_chunk_size) + stdout_value = output_stream + stderr_value = proc.stderr.read() + # strip trailing "\n" + if stderr_value.endswith("\n"): + stderr_value = stderr_value[:-1] + status = proc.wait() + # END stdout handling + finally: + proc.stdout.close() + proc.stderr.close() + + if self.GIT_PYTHON_TRACE == 'full': + cmdstr = " ".join(command) + if stderr_value: + print "%s -> %d; stdout: '%s'; stderr: '%s'" % (cmdstr, status, stdout_value, stderr_value) + elif stdout_value: + print "%s -> %d; stdout: '%s'" % (cmdstr, status, stdout_value) + else: + print "%s -> %d" % (cmdstr, status) + # END handle debug printing + + if with_exceptions and status != 0: + raise GitCommandError(command, status, stderr_value) + + # Allow access to the command's status code + if with_extended_output: + return (status, stdout_value, stderr_value) + else: + return stdout_value + + def transform_kwargs(self, **kwargs): + """Transforms Python style kwargs into git command line options.""" + args = list() + for k, v in kwargs.items(): + if len(k) == 1: + if v is True: + args.append("-%s" % k) + elif type(v) is not bool: + args.append("-%s%s" % (k, v)) + else: + if v is True: + args.append("--%s" % dashify(k)) + elif type(v) is not bool: + args.append("--%s=%s" % (dashify(k), v)) + return args + + @classmethod + def __unpack_args(cls, arg_list): + if not isinstance(arg_list, (list,tuple)): + return [ str(arg_list) ] + + outlist = list() + for arg in arg_list: + if isinstance(arg_list, (list, tuple)): + outlist.extend(cls.__unpack_args( arg )) + # END recursion + else: + outlist.append(str(arg)) + # END for each arg + return outlist + + def _call_process(self, method, *args, **kwargs): + """Run the given git command with the specified arguments and return + the result as a String + + :param method: + is the command. Contained "_" characters will be converted to dashes, + such as in 'ls_files' to call 'ls-files'. + + :param args: + is the list of arguments. If None is included, it will be pruned. + This allows your commands to call git more conveniently as None + is realized as non-existent + + :param kwargs: + is a dict of keyword arguments. + This function accepts the same optional keyword arguments + as execute(). + + ``Examples``:: + git.rev_list('master', max_count=10, header=True) + + :return: Same as ``execute``""" + # Handle optional arguments prior to calling transform_kwargs + # otherwise these'll end up in args, which is bad. + _kwargs = dict() + for kwarg in execute_kwargs: + try: + _kwargs[kwarg] = kwargs.pop(kwarg) + except KeyError: + pass + + # Prepare the argument list + opt_args = self.transform_kwargs(**kwargs) + + ext_args = self.__unpack_args([a for a in args if a is not None]) + args = opt_args + ext_args + + def make_call(): + call = [self.GIT_PYTHON_GIT_EXECUTABLE, dashify(method)] + call.extend(args) + return call + #END utility to recreate call after changes + + if sys.platform == 'win32': + try: + try: + return self.execute(make_call(), **_kwargs) + except WindowsError: + # did we switch to git.cmd already, or was it changed from default ? permanently fail + if self.GIT_PYTHON_GIT_EXECUTABLE != self.git_exec_name: + raise + #END handle overridden variable + type(self).GIT_PYTHON_GIT_EXECUTABLE = self.git_exec_name_win + call = [self.GIT_PYTHON_GIT_EXECUTABLE] + list(args) + + try: + return self.execute(make_call(), **_kwargs) + finally: + import warnings + msg = "WARNING: Automatically switched to use git.cmd as git executable, which reduces performance by ~70%." + msg += "Its recommended to put git.exe into the PATH or to set the %s environment variable to the executable's location" % self._git_exec_env_var + warnings.warn(msg) + #END print of warning + #END catch first failure + except WindowsError: + raise WindowsError("The system cannot find or execute the file at %r" % self.GIT_PYTHON_GIT_EXECUTABLE) + #END provide better error message + else: + return self.execute(make_call(), **_kwargs) + #END handle windows default installation + + def _parse_object_header(self, header_line): + """ + :param header_line: + type_string size_as_int + + :return: (hex_sha, type_string, size_as_int) + + :raise ValueError: if the header contains indication for an error due to + incorrect input sha""" + tokens = header_line.split() + if len(tokens) != 3: + if not tokens: + raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip())) + else: + raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip())) + # END handle actual return value + # END error handling + + if len(tokens[0]) != 40: + raise ValueError("Failed to parse header: %r" % header_line) + return (tokens[0], tokens[1], int(tokens[2])) + + def __prepare_ref(self, ref): + # required for command to separate refs on stdin + refstr = str(ref) # could be ref-object + if refstr.endswith("\n"): + return refstr + return refstr + "\n" + + def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): + cur_val = getattr(self, attr_name) + if cur_val is not None: + return cur_val + + options = { "istream" : PIPE, "as_process" : True } + options.update( kwargs ) + + cmd = self._call_process( cmd_name, *args, **options ) + setattr(self, attr_name, cmd ) + return cmd + + def __get_object_header(self, cmd, ref): + cmd.stdin.write(self.__prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + + def get_object_header(self, ref): + """ Use this method to quickly examine the type and size of the object behind + the given ref. + + :note: The method will only suffer from the costs of command invocation + once and reuses the command in subsequent calls. + + :return: (hexsha, type_string, size_as_int)""" + cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + return self.__get_object_header(cmd, ref) + + def get_object_data(self, ref): + """ As get_object_header, but returns object data as well + :return: (hexsha, type_string, size_as_int,data_string) + :note: not threadsafe""" + hexsha, typename, size, stream = self.stream_object_data(ref) + data = stream.read(size) + del(stream) + return (hexsha, typename, size, data) + + def stream_object_data(self, ref): + """As get_object_header, but returns the data as a stream + :return: (hexsha, type_string, size_as_int, stream) + :note: This method is not threadsafe, you need one independent Command instance + per thread to be safe !""" + cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + hexsha, typename, size = self.__get_object_header(cmd, ref) + return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout)) + + def clear_cache(self): + """Clear all kinds of internal caches to release resources. + + Currently persistent commands will be interrupted. + + :return: self""" + self.cat_file_all = None + self.cat_file_header = None + return self