changeset 1062:aa7e34e738cf

8042810: hgforest: some shells run read in sub-shell and can't use fifo Reviewed-by: chegar, erikj
author mduigou
date Tue, 13 May 2014 08:42:44 -0700
parents ec58dd8b23b6
children de457bd243b4
files common/bin/hgforest.sh
diffstat 1 files changed, 147 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/common/bin/hgforest.sh	Tue Jul 08 11:21:43 2014 -0700
+++ b/common/bin/hgforest.sh	Tue May 13 08:42:44 2014 -0700
@@ -1,5 +1,4 @@
 #!/bin/sh
-
 #
 # Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -23,25 +22,40 @@
 # questions.
 #
 
-# Shell script for a fast parallel forest command
+# Shell script for a fast parallel forest/trees command
 
-global_opts=""
-status_output="/dev/stdout"
-qflag="false"
-vflag="false"
-sflag="false"
+usage() {
+      echo "usage: $0 [-h|--help] [-q|--quiet] [-v|--verbose] [-s|--sequential] [--] <command> [commands...]" > ${status_output}
+      echo "Environment variables which modify behaviour:"
+      echo "   HGFOREST_QUIET      : (boolean) If 'true' then standard output is redirected to /dev/null"
+      echo "   HGFOREST_VERBOSE    : (boolean) If 'true' then Mercurial asked to produce verbose output"
+      echo "   HGFOREST_SEQUENTIAL : (boolean) If 'true' then repos are processed sequentially. Disables concurrency"
+      echo "   HGFOREST_GLOBALOPTS : (string, must begin with space) Additional Mercurial global options"
+      echo "   HGFOREST_REDIRECT   : (file path) Redirect standard output to specified file"
+      echo "   HGFOREST_FIFOS      : (boolean) Default behaviour for FIFO detection. Does not override FIFOs disabled"
+      echo "   HGFOREST_CONCURRENCY: (positive integer) Number of repos to process concurrently"
+      echo "   HGFOREST_DEBUG      : (boolean) If 'true' then temp files are retained"
+      exit 1
+}
+
+global_opts="${HGFOREST_GLOBALOPTS:-}"
+status_output="${HGFOREST_REDIRECT:-/dev/stdout}"
+qflag="${HGFOREST_QUIET:-false}"
+vflag="${HGFOREST_VERBOSE:-false}"
+sflag="${HGFOREST_SEQUENTIAL:-false}"
 while [ $# -gt 0 ]
 do
   case $1 in
+    -h | --help )
+      usage
+      ;;
+
     -q | --quiet )
       qflag="true"
-      global_opts="${global_opts} -q"
-      status_output="/dev/null"
       ;;
 
     -v | --verbose )
       vflag="true"
-      global_opts="${global_opts} -v"
       ;;
 
     -s | --sequential )
@@ -63,39 +77,60 @@
   shift
 done
 
-
-command="$1"; shift
-command_args="$@"
+# silence standard output?
+if [ ${qflag} = "true" ] ; then
+  global_opts="${global_opts} -q"
+  status_output="/dev/null"
+fi
 
-usage() {
-      echo "usage: $0 [-q|--quiet] [-v|--verbose] [-s|--sequential] [--] <command> [commands...]" > ${status_output}
-      exit 1
-}
+# verbose output?
+if [ ${vflag} = "true" ] ; then
+  global_opts="${global_opts} -v"
+fi
 
-if [ "x" = "x$command" ] ; then
+# Make sure we have a command.
+if [ $# -lt 1 -o -z "${1:-}" ] ; then
   echo "ERROR: No command to hg supplied!"
   usage
 fi
 
-# Check if we can use fifos for monitoring sub-process completion.
-on_windows=`uname -s | egrep -ic -e 'cygwin|msys'`
-if [ ${on_windows} = "1" ]; then
-  # cygwin has (2014-04-18) broken (single writer only) FIFOs
-  # msys has (2014-04-18) no FIFOs.
-  have_fifos="false"
-else
-  have_fifos="true"
-fi
+command="$1"; shift
+command_args="${@:-}"
 
 # Clean out the temporary directory that stores the pid files.
 tmp=/tmp/forest.$$
 rm -f -r ${tmp}
 mkdir -p ${tmp}
 
+
+if [ "${HGFOREST_DEBUG:-false}" = "true" ] ; then
+  echo "DEBUG: temp files are in: ${tmp}"
+fi
+
+# Check if we can use fifos for monitoring sub-process completion.
+echo "1" > ${tmp}/read
+while_subshell=1
+while read line; do
+  while_subshell=0
+  break;
+done < ${tmp}/read
+rm ${tmp}/read
+
+on_windows=`uname -s | egrep -ic -e 'cygwin|msys'`
+
+if [ ${while_subshell} = "1" -o ${on_windows} = "1" ]; then
+  # cygwin has (2014-04-18) broken (single writer only) FIFOs
+  # msys has (2014-04-18) no FIFOs.
+  # older shells create a sub-shell for redirect to while
+  have_fifos="false"
+else
+  have_fifos="${HGFOREST_FIFOS:-true}"
+fi
+
 safe_interrupt () {
   if [ -d ${tmp} ]; then
     if [ "`ls ${tmp}/*.pid`" != "" ]; then
-      echo "Waiting for processes ( `cat ${tmp}/*.pid | tr '\n' ' '`) to terminate nicely!" > ${status_output}
+      echo "Waiting for processes ( `cat ${tmp}/.*.pid ${tmp}/*.pid 2> /dev/null | tr '\n' ' '`) to terminate nicely!" > ${status_output}
       sleep 1
       # Pipe stderr to dev/null to silence kill, that complains when trying to kill
       # a subprocess that has already exited.
@@ -110,10 +145,12 @@
 
 nice_exit () {
   if [ -d ${tmp} ]; then
-    if [ "`ls ${tmp}`" != "" ]; then
+    if [ "`ls -A ${tmp} 2> /dev/null`" != "" ]; then
       wait
     fi
-    rm -f -r ${tmp}
+    if [ "${HGFOREST_DEBUG:-false}" != "true" ] ; then
+      rm -f -r ${tmp}
+    fi
   fi
 }
 
@@ -128,17 +165,20 @@
 repos=""
 repos_extra=""
 if [ "${command}" = "clone" -o "${command}" = "fclone" -o "${command}" = "tclone" ] ; then
+  # we must be a clone
   if [ ! -f .hg/hgrc ] ; then
     echo "ERROR: Need initial repository to use this script" > ${status_output}
     exit 1
   fi
 
+  # the clone must know where it came from (have a default pull path).
   pull_default=`hg paths default`
   if [ "${pull_default}" = "" ] ; then
     echo "ERROR: Need initial clone with 'hg paths default' defined" > ${status_output}
     exit 1
   fi
 
+  # determine which sub repos need to be cloned.
   for i in ${subrepos} ; do
     if [ ! -f ${i}/.hg/hgrc ] ; then
       repos="${repos} ${i}"
@@ -147,12 +187,15 @@
 
   pull_default_tail=`echo ${pull_default} | sed -e 's@^.*://[^/]*/\(.*\)@\1@'`
 
-  if [ "${command_args}" != "" ] ; then
+  if [ -n "${command_args}" ] ; then
+    # if there is an "extra sources" path then reparent "extra" repos to that path
     if [ "x${pull_default}" = "x${pull_default_tail}" ] ; then
       echo "ERROR: Need initial clone from non-local source" > ${status_output}
       exit 1
     fi
     pull_extra="${command_args}/${pull_default_tail}"
+
+    # determine which extra subrepos need to be cloned.
     for i in ${subrepos_extra} ; do
       if [ ! -f ${i}/.hg/hgrc ] ; then
         repos_extra="${repos_extra} ${i}"
@@ -160,7 +203,7 @@
     done
   else
     if [ "x${pull_default}" = "x${pull_default_tail}" ] ; then
-      # local source repo. Copy the extras ones that exist there.
+      # local source repo. Clone the "extra" subrepos that exist there.
       for i in ${subrepos_extra} ; do
         if [ -f ${pull_default}/${i}/.hg/hgrc -a ! -f ${i}/.hg/hgrc ] ; then
           # sub-repo there in source but not here
@@ -169,13 +212,17 @@
       done
     fi
   fi
-  at_a_time=2
+
   # Any repos to deal with?
   if [ "${repos}" = "" -a "${repos_extra}" = "" ] ; then
     echo "No repositories to process." > ${status_output}
     exit
   fi
+
+  # Repos to process concurrently. Clone does better with low concurrency.
+  at_a_time="${HGFOREST_CONCURRENCY:-2}"
 else
+  # Process command for all of the present repos
   for i in . ${subrepos} ${subrepos_extra} ; do
     if [ -d ${i}/.hg ] ; then
       repos="${repos} ${i}"
@@ -189,6 +236,7 @@
   fi
 
   # any of the repos locked?
+  locked=""
   for i in ${repos} ; do
     if [ -h ${i}/.hg/store/lock -o -f ${i}/.hg/store/lock ] ; then
       locked="${i} ${locked}"
@@ -198,34 +246,39 @@
     echo "ERROR: These repositories are locked: ${locked}" > ${status_output}
     exit 1
   fi
-  at_a_time=8
+
+  # Repos to process concurrently.
+  at_a_time="${HGFOREST_CONCURRENCY:-8}"
 fi
 
 # Echo out what repositories we do a command on.
 echo "# Repositories: ${repos} ${repos_extra}" > ${status_output}
 
 if [ "${command}" = "serve" ] ; then
-  # "serve" is run for all the repos.
+  # "serve" is run for all the repos as one command.
   (
     (
+      cwd=`pwd`
+      serving=`basename ${cwd}`
       (
         echo "[web]"
-        echo "description = $(basename $(pwd))"
+        echo "description = ${serving}"
         echo "allow_push = *"
         echo "push_ssl = False"
 
         echo "[paths]"
-        for i in ${repos} ${repos_extra} ; do
+        for i in ${repos} ; do
           if [ "${i}" != "." ] ; then
-            echo "/$(basename $(pwd))/${i} = ${i}"
+            echo "/${serving}/${i} = ${i}"
           else
-            echo "/$(basename $(pwd)) = $(pwd)"
+            echo "/${serving} = ${cwd}"
           fi
         done
       ) > ${tmp}/serve.web-conf
 
-      echo "serving root repo $(basename $(pwd))"
+      echo "serving root repo ${serving}" > ${status_output}
 
+      echo "hg${global_opts} serve" > ${status_output}
       (PYTHONUNBUFFERED=true hg${global_opts} serve -A ${status_output} -E ${status_output} --pid-file ${tmp}/serve.pid --web-conf ${tmp}/serve.web-conf; echo "$?" > ${tmp}/serve.pid.rc ) 2>&1 &
     ) 2>&1 | sed -e "s@^@serve:   @" > ${status_output}
   ) &
@@ -234,81 +287,93 @@
 
   # n is the number of subprocess started or which might still be running.
   n=0
-  if [ $have_fifos = "true" ]; then
+  if [ ${have_fifos} = "true" ]; then
     # if we have fifos use them to detect command completion.
     mkfifo ${tmp}/fifo
     exec 3<>${tmp}/fifo
-    if [ "${sflag}" = "true" ] ; then
-      # force sequential
-      at_a_time=1
-    fi
   fi
 
+  # iterate over all of the subrepos.
   for i in ${repos} ${repos_extra} ; do
     n=`expr ${n} '+' 1`
     repopidfile=`echo ${i} | sed -e 's@./@@' -e 's@/@_@g'`
     reponame=`echo ${i} | sed -e :a -e 's/^.\{1,20\}$/ &/;ta'`
     pull_base="${pull_default}"
-    for j in $repos_extra ; do
-      if [ "$i" = "$j" ] ; then
-          pull_base="${pull_extra}"
+
+    # regular repo or "extra" repo?
+    for j in ${repos_extra} ; do
+      if [ "${i}" = "${j}" ] ; then
+        # it's an "extra"
+        pull_base="${pull_extra}"
       fi
     done
+
+    # remove trailing slash
     pull_base="`echo ${pull_base} | sed -e 's@[/]*$@@'`"
+
+    # execute the command on the subrepo
     (
       (
         if [ "${command}" = "clone" -o "${command}" = "fclone" -o "${command}" = "tclone" ] ; then
-          pull_newrepo="${pull_base}/${i}"
-          path="`dirname ${i}`"
-          if [ "${path}" != "." ] ; then
+          # some form of clone
+          clone_newrepo="${pull_base}/${i}"
+          parent_path="`dirname ${i}`"
+          if [ "${parent_path}" != "." ] ; then
             times=0
-            while [ ! -d "${path}" ]   ## nested repo, ensure containing dir exists
-            do
-              times=`expr ${times} '+' 1`
+            while [ ! -d "${parent_path}" ] ; do  ## nested repo, ensure containing dir exists
+              if [ "${sflag}" = "true" ] ; then
+                # Missing parent is fatal during sequential operation.
+                echo "ERROR: Missing parent path: ${parent_path}" > ${status_output}
+                exit 1
+              fi
+              times=`expr ${times} '+' 1)`
               if [ `expr ${times} '%' 10` -eq 0 ] ; then
-                echo "${path} still not created, waiting..." > ${status_output}
+                echo "${parent_path} still not created, waiting..." > ${status_output}
               fi
               sleep 5
             done
           fi
-          echo "hg${global_opts} clone ${pull_newrepo} ${i}" > ${status_output}
-          (PYTHONUNBUFFERED=true hg${global_opts} clone ${pull_newrepo} ${i}; echo "$?" > ${tmp}/${repopidfile}.pid.rc ) 2>&1 &
+          # run the clone command.
+          echo "hg${global_opts} clone ${clone_newrepo} ${i}" > ${status_output}
+          (PYTHONUNBUFFERED=true hg${global_opts} clone ${clone_newrepo} ${i}; echo "$?" > ${tmp}/${repopidfile}.pid.rc ) 2>&1 &
         else
+          # run the command.
           echo "cd ${i} && hg${global_opts} ${command} ${command_args}" > ${status_output}
           cd ${i} && (PYTHONUNBUFFERED=true hg${global_opts} ${command} ${command_args}; echo "$?" > ${tmp}/${repopidfile}.pid.rc ) 2>&1 &
         fi
 
         echo $! > ${tmp}/${repopidfile}.pid
       ) 2>&1 | sed -e "s@^@${reponame}:   @" > ${status_output}
-      if [ $have_fifos = "true" ]; then
-        echo "${reponame}" >&3
+      # tell the fifo waiter that this subprocess is done.
+      if [ ${have_fifos} = "true" ]; then
+        echo "${i}" >&3
       fi
     ) &
 
-    if [ $have_fifos = "true" ]; then
-      # check on count of running subprocesses and possibly wait for completion
-      if [ ${at_a_time} -lt ${n} ] ; then
-        # read will block until there are completed subprocesses
-        while read repo_done; do
-          n=`expr ${n} '-' 1`
-          if [ ${n} -lt ${at_a_time} ] ; then
-            # we should start more subprocesses
-            break;
-          fi
-        done <&3
-      fi
+    if [ "${sflag}" = "true" ] ; then
+      # complete this task before starting another.
+      wait
     else
-      if [ "${sflag}" = "false" ] ; then
+      if [ "${have_fifos}" = "true" ]; then
+        # check on count of running subprocesses and possibly wait for completion
+        if [ ${n} -ge ${at_a_time} ] ; then
+          # read will block until there are completed subprocesses
+          while read repo_done; do
+            n=`expr ${n} '-' 1`
+            if [ ${n} -lt ${at_a_time} ] ; then
+              # we should start more subprocesses
+              break;
+            fi
+          done <&3
+        fi
+      else
         # Compare completions to starts
-        completed="`(ls -1 ${tmp}/*.pid.rc 2> /dev/null | wc -l) || echo 0`"
-        while [ ${at_a_time} -lt `expr ${n} '-' ${completed}` ] ; do
+        completed="`(ls -a1 ${tmp}/*.pid.rc 2> /dev/null | wc -l) || echo 0`"
+        while [ `expr ${n} '-' ${completed}` -ge ${at_a_time} ] ; do
           # sleep a short time to give time for something to complete
           sleep 1
-          completed="`(ls -1 ${tmp}/*.pid.rc 2> /dev/null | wc -l) || echo 0`"
+          completed="`(ls -a1 ${tmp}/*.pid.rc 2> /dev/null | wc -l) || echo 0`"
         done
-      else
-        # complete this task before starting another.
-        wait
       fi
     fi
   done
@@ -320,11 +385,12 @@
 # Terminate with exit 0 only if all subprocesses were successful
 ec=0
 if [ -d ${tmp} ]; then
-  for rc in ${tmp}/*.pid.rc ; do
+  rcfiles="`(ls -a ${tmp}/*.pid.rc 2> /dev/null) || echo ''`"
+  for rc in ${rcfiles} ; do
     exit_code=`cat ${rc} | tr -d ' \n\r'`
     if [ "${exit_code}" != "0" ] ; then
-      repo="`echo ${rc} | sed -e s@^${tmp}@@ -e 's@/*\([^/]*\)\.pid\.rc$@\1@' -e 's@_@/@g'`"
-      echo "WARNING: ${repo} exited abnormally ($exit_code)" > ${status_output}
+      repo="`echo ${rc} | sed -e 's@^'${tmp}'@@' -e 's@/*\([^/]*\)\.pid\.rc$@\1@' -e 's@_@/@g'`"
+      echo "WARNING: ${repo} exited abnormally (${exit_code})" > ${status_output}
       ec=1
     fi
   done