#! /usr/bin/env python

#############################################################
#                                                           #
#   Author: Sandrine Larroude ,Bertrand Neron               #
#   Organization:'Biological Software and Databases' Group, #
#                Institut Pasteur, Paris.                   #
#   Distributed under GPLv2 Licence. Please refer to the    #
#   COPYING.LIB document.                                   #
#                                                           #
#############################################################

import os , sys
MOBYLEHOME = None
if os.environ.has_key('MOBYLEHOME'):
    MOBYLEHOME = os.environ['MOBYLEHOME']
if not MOBYLEHOME:
    sys.exit( 'MOBYLEHOME must be defined in your environment' )
if ( os.path.join( MOBYLEHOME , 'Src' ) ) not in sys.path:
    sys.path.append( os.path.join( MOBYLEHOME , 'Src' ) )
        
    
from time import time
from shutil import rmtree

from Mobyle.ConfigManager import Config
config = Config()

from Mobyle.Session import Session
from Mobyle.MobyleError import MobyleError, SessionError ,URLError, HTTPError, JobError
from Mobyle.Admin import Admin
from  Mobyle.Utils import isExecuting
from Mobyle.JobState import JobState


def day2second( day ):
    """
    @param day: the number of day to convert in second
    @type day: integer or float
    @return: convert a number of days in second
    @rtype: integer
    """
    return int( day * 24 * 60 * 60 ) 


def clean_jobs( config , start_time , delay , logger , dry_run = False ):
    """
    remove job directory if job is finished and older than the delay.
    @param config: the Mobyle Configuration    
    @type config: L{Config} instance
    @param start_time: a time in seconds which represent the date of the beginig of cleaning since Epoch.
    @type start_time: foat
    @param delay: the delay in days to remove sessons older than start_time + delay
    @type delay: float    
    @param logger: the logger to log informations
    @type logger: logging.logger instance
    @param dry_run: 
    @type dry_run: boolean
    """
    delay_sec = day2second( delay )
    jobs_repository = config.results_path()
    if not os.path.isdir( jobs_repository ):
        logger.critical( "Check your Mobyle configuration file, the jobs repository :'%s' does not exist")
        sys.exit(1)
    try:
        service_names = os.listdir( jobs_repository )
        service_names.remove( 'ADMINDIR' )
    except Exception, err:
        logger.critical( "The jobs directory is not accessible: %s" %err )
        sys.exit(1) 
    for service_name in service_names:
        logger.info( "-------- cleanning %s --------" % service_name)
        service_path = os.path.join( jobs_repository ,  service_name )
        try:
            jobs_keys = os.listdir( service_path )
        except Exception, err:
            logger.error( "The jobs directory for service %s is not accessible: %s" %( service_name , err ) )
            continue
        
        for job_key in jobs_keys:
            logger.info( "cleaning job %s" %job_key )
            job_path = os.path.join( service_path , job_key )
            if not os.path.isdir( job_path ):
                logger.debug( "%s is not a directory." %job_path )
                continue
            try:
                admin = Admin( job_path )
            except MobyleError , err:
                logger.error( "cannot remove the job : '%s' : %s" %( job_path , err ) )    
            
            job_status = admin.getStatus()
            if not job_status.isKnown():
                logger.warning( "Unkown status for job '%s'"% job_path )
                continue
            is_in_admindir = os.access( os.path.join( jobs_repository, 'ADMINDIR', "%s.%s" %( service_name , job_key) )
                                        , os.F_OK)
            last_modification_time = os.path.getmtime( job_path )
            old_job = int( start_time - last_modification_time ) > delay_sec
            if job_status.isEnded() and old_job :
                logger.debug( "the job is ended and old ( %d > %d )"%( int( start_time - last_modification_time ) , delay_sec) )
                if is_in_admindir:
                    logger.error("the job %s has the status %s and is still in ADMINDIR." %( job_path , job_status ) )
                else:
                    workflowID = admin.getWorkflowID()
                    logger.debug( "workflowID = %s "%workflowID )
                    if workflowID:
                        logger.debug( "the job %s belongs to the workflow %s" %(job_path ,  workflowID ))
                        try:
                            job_state = JobState( uri = workflowID )
                        except URLError , err :
                            #the portal is down?
                            logger.warning( "The job %s belongs to the workflow %s and the portal does respond (%s). The job is not removed."% (job_path , 
                                                                                                                                                workflowID , 
                                                                                                                                                err))
                        except HTTPError , err:
                            #the job does not exists anymore
                            if err.code == 404:
                                logger.info( "The job %s belongs to the workflow %s which is not exist any more (%s). The job is removed." % (job_path ,
                                                                                                                                              workflowID , 
                                                                                                                                              err))
                                if not dry_run:
                                    try:
                                        rmtree( job_path )
                                    except Exception, err:
                                        logger.error( "cannot remove job %s : %s ." %(job_path , err ) )
                            else:
                                logger.info( "The job %s belongs to the workflow %s which is not reachable (%s). The job is not removed." % (job_path , 
                                                                                                                                             workflowID , 
                                                                                                                                             err))
                        except JobError , err:
                            from errno import ENOENT
                            if err.errno == ENOENT:
                                logger.debug( "The job %s belongs to the workflow %s. which is not exist any more. The job is removed."% (job_path , 
                                                                                                                                          workflowID 
                                                                                                                                          ))
                                if not dry_run:
                                    try:
                                        rmtree( job_path )
                                    except Exception, err:
                                        logger.error( "cannot remove job %s : %s ." %(job_path , err ) )
                            else:
                                logger.error( "the workflow job %s cannot be loaded: %s : the job %s is not removed."%( workflowID ,
                                                                                                                     err ,
                                                                                                                     job_path
                                                                                                                    ) )
                        except Exception ,err:
                            logger.error( "an error occured during %s workflow job loading: %s : the job %s is not removed."%( workflowID ,
                                                                                                                               err ,
                                                                                                                               job_path
                                                                                                                               ) )
                        else:
                            workflow_status = job_state.getStatus()
                            logger.debug( "the job %s belongs to the workflow %s which has %s status. The job is not removed."%( job_path , 
                                                                                                                                 workflowID , 
                                                                                                                                 workflow_status))
                    else:
                        if not dry_run:
                            try:
                                rmtree( job_path )
                            except Exception, err:
                                logger.error( "cannot remove job %s : %s ." %(job_path , err ) )
            elif job_status.isQueryable() and old_job:
                if not is_in_admindir:
                    logger.error("The job %s has the status '%s' since more than %f days and is not anymore in ADMINDIR." % ( job_path, job_status, delay))
                else:
                    try:
                        if not isExecuting( job_path ):
                            logger.error( "The job %s has the status '%s' even if it is not executing." % ( job_path, job_status ) )
                    except MobyleError ,err:
                        logger.error( "Probblem during quering the satus of the job %s: %s" %( job_path ,err ) )
            elif old_job :
                logger.error( "The job '%s' has the %s status since more than the delay" % (job_path, job_status) )
            else:
                logger.info( "The job '%s' is too young to be cleaned" %job_path )

            
def clean_sessions( config , start_time , delay , logger , dry_run = False ):
    """
    remove annonymous sessions if the sessions does not point toward any jobs
    @param config: the Mobyle Configuration    
    @type config: L{Config} instance
    @param start_time: a time in seconds which represent the date of the beginig of cleaning since Epoch.
    @type start_time: foat
    @param delay: the delay in days to remove sessons older than start_time + delay
    @type delay: float    
    @param logger: the logger to log informations
    @type logger: logging.logger instance
    @param dry_run: 
    @type dry_run: boolean
    """
    delay = day2second( delay )
    sessions_repository = os.path.join( config.user_sessions_path() , 'anonymous' )
    if not os.path.isdir( sessions_repository ):
        logger.critical( "Check your Mobyle configuration file, the annonymous sessions directory:'%s' does not exist" %sessions_repository)
        sys.exit(1)
    try:
        sessions_keys = os.listdir( sessions_repository )
    except Exception, err:
        logger.critical( "The anonymous sessions directory is not accessible: %s" %err )
        sys.exit(1)                 
    for session_key in sessions_keys :
        session_path = os.path.join( sessions_repository , session_key )
        if not os.path.isdir( session_path ):
            logger.debug( "%s is not a directory." %session_path)
            continue
        last_modification_time = os.path.getmtime( session_path )
        session = Session( session_path , session_key , config )
        try:
            jobs = session.getAllJobs()
            if not jobs and int( start_time - last_modification_time ) > delay :
                logger.info("removing session %s" %session_path)
                if not dry_run:
                    try:
                        rmtree( session_path )
                    except Exception , err:
                        logger.error( "cannot remove the session %s : %s" %( session_path , err ))
                        continue
        except SessionError, err:  
            if not os.access( os.path.join( session_path,'.session.xml'), os.F_OK):
                logger.warning( "no .session.xml in the session %s , remove it anyway" % session_path )
                try:
                    rmtree( session_path )
                except Exception , err:
                    logger.error( "cannot remove the session %s : %s" %( session_path , err ))
                    continue    
            else:
                logger.error( "Error during session %s loading: %s" % ( session_path , err ) )
                continue
        
        
        
        
        
if __name__ == "__main__":
    
    now = time()
    from optparse import OptionParser
    

    parser = OptionParser( )
    parser.add_option( "-j" , "--jobs",
                       action="store_true", 
                       dest = "jobs",
                       default = False ,
                       help= "Clean jobs (programs and workflows).")
    
    parser.add_option( "-s" , "--sessions",
                       action="store_true", 
                       dest = "sessions",
                       default = False ,
                       help= "Clean anonymous sessions.")
    
    parser.add_option( "-d" , "--delay",
                       action="store", 
                       type = 'int', 
                       dest = "delay",
                       default = config.remainResults() ,
                       help= "Delete jobs/sessions older than <DELAY> days ( positive integer value ).")
    
    parser.add_option( "-l" , "--log",
                       action="store", 
                       type = 'string', 
                       dest = "log_file",
                       help= "Path to the Logfile where put the logs.")
    
    parser.add_option( "-n" , "--dry-run",
                       action="store_true", 
                       dest = "dry_run",
                       help= "don't actually do anything.")
    
    parser.add_option("-v", "--verbose",
                      action="count", 
                      dest="verbosity", 
                      default= 0,
                      help="increase the verbosity level. There is 4 levels: Error messages (default), Warning (-v), Info (-vv) and Debug.(-vvv)") 
    
    options, args = parser.parse_args()
    
    
    
    if not options.jobs and not options.sessions :
        options.jobs = True
        options.sessions =True

    import logging  
    
    if options.log_file is None:
        cleaner_handler = logging.StreamHandler(  sys.stderr  )
    else:
        try:
            cleaner_handler = logging.FileHandler( options.log_file , 'a' )
        except(IOError , OSError) , err:
            print >> sys.stderr , "cannot log messages in %s: %s"%( options.log_file , err )
            sys.exit(1)
            
    if options.verbosity == 0:
        cleaner_handler.setLevel( logging.ERROR )
    elif options.verbosity == 1:
        cleaner_handler.setLevel( logging.WARNING )
    elif options.verbosity == 2:
        cleaner_handler.setLevel( logging.INFO )
    elif options.verbosity == 3:
        cleaner_handler.setLevel( logging.DEBUG )
        
    if options.verbosity < 3:
        cleaner_formatter = logging.Formatter( '%(filename)-10s : %(levelname)-8s : %(asctime)s : %(message)s' , '%a, %d %b %Y %H:%M:%S' )
    else:
        cleaner_formatter =   logging.Formatter( '%(filename)-10s : %(levelname)-8s : L %(lineno)d : %(asctime)s : %(message)s' , '%a, %d %b %Y %H:%M:%S' )        
    cleaner_handler.setFormatter( cleaner_formatter )
    logger = logging.getLogger( 'cleaner' )
    logger.addHandler( cleaner_handler )
    
    if options.jobs :
        clean_jobs( config , now , options.delay , logger, dry_run = options.dry_run )
    if options.sessions :
        clean_sessions( config , now , options.delay , logger, dry_run = options.dry_run )
    
    