
########################################################################################
#                                                                                      #
#   Author: Pierre Tuffery                                                             #
#   Organization:   Ressource Parisienne en Bioinformatique Structurale (RPBS), Paris  #
#   Distributed under GPLv2 Licence. Please refer to the COPYING.LIB document.         #
#                                                                                      #
########################################################################################
"""
Classes executing the command and managing the results  
"""
import os ,os.path
from subprocess import Popen, PIPE
import sys
import time

import logging ,logging.config

import logging
_log = logging.getLogger(__name__)

import Mobyle.JobState
import Mobyle.ConfigManager
import Mobyle.Utils
from Mobyle.Execution.Batch import _Batch

import Mobyle.Net

import Local.Policy
from Mobyle.MobyleError import *


_cfg = Mobyle.ConfigManager.Config()
__extra_epydoc_fields__ = [('call', 'Called by','Called by')]




class PBS(_Batch):
    """
    @author: Pierre Tuffery
    PBS class to work for execution or route queues.
    """

    @staticmethod
    def _pbsInit( ):
        """
        To get some PBS local information
        """

        # _log.debug("PBS _pbsInit\n")
        (PBS_ROOT, PBS_WAIT_TIME, PBS_PRIORITY) =  _cfg.pbs() # return (PBS_ROOT, PBS_ENV)
        # ( PBS_ROOT, PBS_PATH ) =  _cfg.pbs() # return (PBS_ROOT, PBS_ENV)
        pbs_prefix =  PBS_ROOT 
        # _log.debug("PBS_prefix %s\n" % pbs_prefix)
        # pbs_env = {'PBS_ROOT': PBS_ROOT, 'PATH': PBS_PATH}
        pbs_env = {'PBS_ROOT': PBS_ROOT}
        pbs_defaults = {'PBS_WAIT_TIME' : PBS_WAIT_TIME, 'PBS_PRIORITY': PBS_PRIORITY}
        # _log.debug("PBS_env %s\n" % pbs_env)
        return ( pbs_prefix , pbs_env, pbs_defaults )
    
##         PBS_QUEUES = _cfg.pbs()
                    
##         arch_path= os.path.join(SGE_ROOT, 'util' , 'arch')

##         try:
##             arch_pipe = Popen(arch_path         ,
##                               shell     = False ,
##                               stdout    = PIPE  ,
##                               stdin     = None  ,
##                               stderr    = None  ,
##                               close_fds = True  )

##             arch_pipe.wait() 
##             arch_rc = arch_pipe.returncode
            

##         except OSError , err:
##             #this error is log by calling method because I can't access to jobKey , adm status ... from static method
##             msg = "SGE: I can't determined the system arch:"+str(err)
##             raise MobyleError , msg
     
##         if arch_rc != 0 :
##             msg = "I can't determined the system arch (return code = " + str( rc ) + " )"
##             raise MobyleError , msg
        
##         arch = ''.join( arch_pipe.stdout.readlines() ).strip()

##         sge_prefix = os.path.join( SGE_ROOT , 'bin' , arch )
##         sge_env = {'SGE_CELL': SGE_CELL , 'SGE_ROOT': SGE_ROOT }   
##         return ( pbs_prefix , pbs_env )

    def run(self):
        """
        We build the pbs script and we execute it.
        Unlike SGE, PDB is asynchronous, so we must manage the wait until
        job is finished.
        """
        # _log.debug("entering PBS.run()")


        if os.getcwd() != os.path.abspath( self.dirPath ):
            msg = "the child process execute itself in a wrong directory"

            self._logError( admMsg = msg ,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = msg  )

            raise MobyleError , msg 

        else:
            jobKey = self.getKey()

        # PBS_QUEUES = _cfg.pbsqueues()
        thisservicequeue = _cfg.queue( self.serviceName )
        # _log.debug("PBS service queue: %s\n" % thisservicequeue)

        self.queueproperties = _cfg.queueProperties(self.queue)
        # _log.debug("PBS queue (%s) properties: %s\n" % (self.queue, self.queueproperties) )
        if self.queueproperties == None:
            msg = "undefined pbs queue invoked %s" % self.queue

            self._logError( admMsg = msg ,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = msg  )

            raise MobyleError , msg

        # PBS
        # _log.debug("PBS queueproperties get\n")
        options = { '-d': self.dirPath            ,  #set the working dir to current dir
                    '-m': 'n'            ,  # no email
                    '-N' : jobKey        ,  #the id of the job
                    '-q' : self.queue    ,  #the queue 
                    '-p' : '0'           ,  #the priority
                    '-V' : ''            ,  #job inherits of the whole environment
                    '-u' : self.queueproperties['uid'] , # owner of the job
                    '-S' : '/bin/bash'   
                    }

        pbs_opts = ''

        for opt in options.keys():
            pbs_opts += opt + ' ' + options[opt]+' '

        # _log.debug("PBS opts: %s\n" % pbs_opts)

        try:
            pbs_prefix, pbs_env, pbs_defaults = PBS._pbsInit()
        except MobyleError , err:
            self._logError( admMsg = str( err ) ,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = None )

            _log.critical( str( err ) ) 

            raise MobyleError , err

        # _log.debug("PBS prefix: %s\n" % pbs_prefix)

        # We must write a script for remote queuing
        fName = "%s/batch.sh" % os.getcwd()
        # _log.debug("PBS script file: %s\n" % fName)
        f = open(fName,"w")
        f.write("#!/bin/bash\n")
        f.write("#PBS -S %s\n" % self.queueproperties['shell'])
        f.write("#PBS -q %s\n" % self.queue)
#        f.write("#PBS -o %s/%s.out\n" % (os.getcwd(), jobKey))
#        f.write("#PBS -e %s/%s.err\n" % (os.getcwd(), jobKey))
        f.write("#PBS -o %s/%s.out\n" % (os.getcwd(), self.serviceName))
        f.write("#PBS -e %s/%s.err\n" % (os.getcwd(), self.serviceName))
        f.write("#PBS -m n\n")
        f.write("#PBS -p %s\n" % self.queueproperties['priority'])
        f.write("#PBS -u %s\n" % self.queueproperties['uid'])
        f.write("#PBS -N %s\n" % self.serviceName)
	try:
		f.write("#PBS -l nodes=%s:ppn=%s\n" % (thisservicequeue["nodes"],thisservicequeue["ppn"] ))
	except:
		_log.debug("PBS exception: no multi_proc instructions for service %s\n" % self.serviceName)	
		pass
        f.write("cd %s\n" % os.getcwd())
        f.write("%s\n" % self.commandLine)
        f.close()

        cmd = os.path.join( pbs_prefix , 'qsub' ) + ' ' + fName
        # _log.debug("PBS system cmd: %s\n" % cmd)

        try:
            fout = open( self.serviceName + ".out" , 'w' )
            ferr = open( self.serviceName + ".err" , 'w' )
        except IOError , err:
            msg= "PBS: can't open file for standard job output: "+ str(err)

            self._logError( admMsg = msg ,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = msg )

            raise MobyleError , msg

        # _log.debug("PBS env: %s\n" % pbs_env)

        self.xmlEnv.update( pbs_env )

        # _log.debug("PBS env: %s\n" % self.xmlEnv)

        try:
            pipe = Popen( cmd ,
                          shell  = True ,
                          stdout = PIPE  ,
                          stdin  = None  ,
                          stderr = PIPE  ,
                          close_fds = True ,
                          env       = self.xmlEnv
                          )
        except OSError, err:
            msg= "PBS execution failed: "+ str(err)

            self._logError( admMsg = msg ,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = msg )

            raise MobyleError , msg

        # We get the pbsId
        pbsId = pipe.stdout.read()
        err   = pipe.stderr.read()
        if err != '':
            msg= "PBS execution returned an error: "+ str(err)

            self._logError( admMsg = msg ,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = msg )

            raise MobyleError , msg

        # make sure the Id contains the information of the host
        pbsId = pbsId.split()[0]
        if self.queueproperties['type'] == "execution":
            pbsKey = pbsId + "@localhost"
        elif self.queueproperties['type'] == "routage":
            pbsKey = pbsId + "@"+ self.queueproperties['remote']
        self.pbsKey = pbsKey

        # _log.debug("PBS key: %s\n" % pbsKey)

        pipe.wait()
        rc = pipe.returncode
        if rc != 0 :
            raise MobyleError , "error in pbs submission status"

        self._adm.setBatch( 'PBS')
        self._adm.setNumber( pbsKey )
        self._adm.commit()

        # _log.debug("PBS job id: %s\n" % pbsKey)

        linkName = ( "%s/%s.%s" %( _cfg.admindir() ,
                                   self.serviceName ,
                                   jobKey
                                )
                     )

        try:
            os.link(
                os.path.join( self.dirPath , '.admin') ,
                linkName
                )
        except OSError , err:
            msg = "can't create link %s in ADMINDIR: %s" %( linkName , err )

            self._logError( admMsg = msg ,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = None )

            _log.critical( "%s/%s : %s" %( self.serviceName ,
                                               jobKey  ,
                                               msg
                                               )
                             )

            raise MobyleError , msg

        # Unlike SGE, PBS is asynchronous. We already have the hand.
        # We must perform active wait.
        # We wait for the job to complete
        # _log.debug("PBS wait loop (wait : %s %s)\n" % (thisservicequeue["wait_time"], pbs_defaults["PBS_WAIT_TIME"]))
        try:
            wait_time = thisservicequeue["wait_time"]
        except:
            try:
                wait_time = pbs_defaults["PBS_WAIT_TIME"]
            except:
                wait_time = 5 # should never get there, but stupid in case.

        # _log.debug("PBS wait_time %d\n" % wait_time)
        self._pbswait(wait_time = wait_time)
        # _log.debug("PBS job completed\n")
        
        # To debug
        mobyleStatus = 4
        statusMsg = None

        try:
            os.unlink( linkName )
        except OSError , err:
            msg = "can't remove link %s in ADMINDIR: %s" %( linkName , err )

            self._logError( admMsg = msg ,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = None )

            _log.critical( "%s/%s : %s" %( self.serviceName ,
                                               jobKey ,
                                               msg
                                               )
                             )

            raise MobyleError , msg

        fout.close()
        ferr.close()

        # return ( mobyleStatus , statusMsg )

        ####################################
        #
        # verifier comment se comport pbs vis vis des code de retours !!!
        #
        #####################################

        self._adm.refresh()
        oldStatus = self._adm.getStatus()
 
        if oldStatus.isEnded():
            return oldStatus
        else:
            status = Mobyle.Utils.Status( code = 4 ) #finished
            # status = Mobyle.Utils.Status( code = 6 , message = "Your job has been cancelled" )  # killed
            # status = Mobyle.Utils.Status( code= 4 , message = "Your job finished with an unusual status code ( %s ), check your results carefully." % pipe.returncode )

        return status

    def _pbswait(self, wait_time = 5):
        """
        This method will wait for the submitted job (asynchronous) to be completed
        We can parameterize the wait_time between the refreshment of the
        information (60 seconds)
        """
        counter = 0
        # _log.debug("_pbswait %d\n" % wait_time)
        while counter < 2:
            while self._isCompleted() == 0:
##             fd = open("/tmp/pbs.dbg","w")
##             fd.write("Try %d\n" % counter)
##             fd.close()
                counter = 0
                time.sleep(wait_time)
            counter += 1
            time.sleep(2) # extra delay for NFS, sync, etc
##         fd = open("/tmp/pbs.dbg","a")
##         fd.write("Done (2) ...\n")
##         fd.close()
        return

    def _isCompleted(self):
        """
        This queries the queuing system using qstat to determine if
        the job is completed
        """
        import os, string

        # _log.debug("_isCompleted ?\n")
        try:
            status = self.getStatus(self.pbsKey)
            # _log.debug("_isCompleted wait status : %s %s msg\n" % (status, msg) )
            if status.isEnded():
                # _log.debug("_isCompleted wait status : isEnded %d\n" % (status.isEnded() ) )
                return 1
            return 0
        except:
            # _log.debug("_isCompleted !\n")
            return 1

    @staticmethod
    def getStatus( pbsKey ):
        """
        @param pbsKey:
        @type pbsKey:
        @return: the status of job with number pbsKey, a message relative to its status (useful for pending jobs)
        @rtype: string
        @todo: for best performance, restrict the sge querying to one queue
        PBS qstat return if completed:
        qstat: Unknown Job Id 19347.bioserv.rpbs.jussieu.fr
        """

        # _log.debug("getStatus from %s\n" % sys.argv[0])
        pbs_prefix , pbs_env, pbs_defaults = PBS._pbsInit( )

        remote = ""
        if pbsKey.count("@"):
            if not pbsKey.count("localhost"):
                remote = pbsKey.split("@")[1]
        jobId = pbsKey.split("@")[0]

        # We have 2 choices:
        # qstat -f (full description of properties)
        # qstat (short description)
        # here, we use short desc.

        # qstat will return info on the form:
        # 32796.bioserv       STDIN            aw3                    0 R Xeon
        # Hence, we truncate jobId if necessary
        if pbsKey.count(".") > 1:
            theIndex = pbsKey.index(".",pbsKey.index(".")+1)
            jobId = pbsKey[:theIndex]
        jobHandle = jobId
        qstatMachine = ""
        if remote != "":
            jobHandle = jobId + "@" + remote
            qstatMachine = "@" + remote

        # 2008, P. Tuffery:
        # safer not to specify jobId in qstat
        # better to get full info and search for the jobId
        pbs_cmd = os.path.join( pbs_prefix , 'qstat' )
        # pbs_cmd = pbs_cmd + " " + jobHandle
        pbs_cmd = pbs_cmd + " " + qstatMachine
        # _log.debug("PBS qstat_cmd %s\n" % pbs_cmd)
        # _log.debug("PBS for job %s\n" % jobId)

        try:
            pipe = Popen( pbs_cmd,
                          shell = True ,
                          stdout = PIPE ,
                          stdin = None  ,
                          stderr= PIPE ,
                          close_fds = True ,
                          env= pbs_env
                          )
            pipe.wait()
            rc = pipe.returncode

        except OSError , err:
            raise MobyleError , "can't query pbs : %s :%s "%( pbs_cmd , err )
        if rc != 0 :
            raise MobyleError , "error in pbs status querying"

        pbs2mobyleStatus = { 'r' : 3 , #running
                             't' : 3 ,
                             'R' : 3 ,
                             's' : 7 , #hold
                             'S' : 7 ,
                             'T' : 7 ,
                             'h' : 7 ,
                             'w' : 2 , #pending
                             'd' : 6 , #killed
                             'E' : 3 , #'exiting',
                             'C' : 3 , #'exiting',
                             'Q' : 2 , #pending
                             'F' : 4 , #finished # Not used
                             }

        err = pipe.stderr.read()
        pipe.stderr.close()
        if err.count("Unknown"):
            # return pbs2mobyleStatus[ 'F' ]
            raise MobyleError , "no job (1) : " + str( pbsKey )

        pbsOutLines = pipe.stdout.readlines()
        # To debug PBS
        # _log.debug("PBS qstat_cmd got: %s\n" % pbsOutLines)

        Qcount = None
	Qmsg   = None
        for line in pbsOutLines :
	    if Qcount:
                Qcount += 1
            else:
                lState = line.split()
                try:
                    lstatus = lState[4]		
                    if lstatus == "Q":
                        Qcount = 1
                except:
                    continue
            
	    lState = line.split()
	    try:
		lstatus = lState[4]		
	    except:
	        continue
            if line.count(jobId):
                jobState = line.split()
                try:
                   status = jobState[ 4 ]
                   pipe.stdout.close()
                   # _log.debug("PBS qstat_cmd got status: %s\n" % status)
                   mobyleStatusCode = pbs2mobyleStatus[ status ]
                   if Qcount and (status == "Q"):
		      Qmsg = "Pending %d" % Qcount
                   #     returnStr = "%s_%d" % ( returnStr, Qcount )
                   # _log.debug("PBS get_status got : %s %s\n" % (status, Qmsg))
                   return Mobyle.Utils.Status( code = mobyleStatusCode , message = Qmsg )
                   break
                except ( ValueError , IndexError ):
                   pass #it's not the first line
        # _log.debug("PBS qstat_cmd got no job. Msg: %s\n" % Qmsg)

        # return 'finished'
        # time.sleep(3)
        rs = Mobyle.Utils.Status( code = 4 , message = Qmsg )

        # _log.debug("Will exit getStatus\n")
        return rs
        # raise MobyleError , "no job (2) : " + str( pbsKey )

    @staticmethod
    def kill( pbsKey ):
        """
        @todo: kill the Job
        """

        # _log.debug("PBS kill %s\n" % pbsKey)

        pbs_prefix , pbs_env, pbs_defaults = PBS._pbsInit( )

        remote = ""
        if pbsKey.count("@"):
            if not pbsKey.count("localhost"):
                remote = pbsKey.split("@")[1]
        jobId = pbsKey.split("@")[0]
        if pbsKey.count(".") > 1:
            theIndex = pbsKey.index(".",pbsKey.index(".")+1)
            jobId = pbsKey[:theIndex]
        jobHandle = jobId
        if remote != "":
            jobHandle = jobId + "@" + remote

        pbs_cmd = "%s %s 1>&2" %( os.path.join( pbs_prefix , 'qdel' ) ,  jobHandle )
        os.environ.update( pbs_env )
        os.system( pbs_cmd )


