Source code for dexom_python.cluster_utils.write_cluster_scripts

import os
import argparse
import numpy as np
from pathlib import Path
from dexom_python.model_functions import read_model, get_all_reactions_from_model, DEFAULT_VALUES
from warnings import warn


[docs]def write_rxn_enum_script(directory, modelfile, weightfile, cplexpath, imatsol=None, reactionlist=None,
                          objtol=DEFAULT_VALUES['obj_tol'], eps=DEFAULT_VALUES['epsilon'],
                          thr=DEFAULT_VALUES['threshold'], tol=DEFAULT_VALUES['tolerance'],
                          timelim=DEFAULT_VALUES['timelimit'], iters=100, maxiters=1e10):
    os.makedirs(directory, exist_ok=True)
    if directory[-1] not in ['/', '\\']:
        directory += '/'
    if reactionlist is not None:
        with open(reactionlist, 'r') as file:
            rxns = file.read().split('\n')
        n_max = len(rxns) if len(rxns) < maxiters else maxiters
        rxn_num = (n_max // iters) + 1
        rstring = '-l ' + reactionlist
    else:
        rstring = ''
        model = read_model(modelfile)
        rxn_num = (len(model.reactions) // iters) + 1
    if imatsol is not None:
        istring = '-p ' + imatsol
    else:
        istring = ''
    if timelim is not None:
        t = '-t ' + str(timelim)
    else:
        t = ''
    for i in range(rxn_num):
        with open(directory+'rxn_batch_' + str(i) + '.sh', 'w+') as f:
            f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n'
                    '#SBATCH -t 10:00:00\n#SBATCH -J rxn_%i\n#SBATCH -o rxnout_%i.out\n#SBATCH -e rxnerr_%i.out\n'
                    % (i, i, i))
            f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/'
                    'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath)
            f.write('python dexom_python/enum_functions/rxn_enum_functions.py -o %srxn_enum_%i --range %i_%i -m %s -r '
                    '%s %s %s -t 6000 -e %s --threshold %s --tol %s --obj_tol %s %s\n' %
                    (directory, i, i*iters, i*iters+iters, modelfile, weightfile, rstring, istring, eps, thr, tol,
                     objtol, t))
    with open(directory+'rxn_runfiles.sh', 'w+') as f:
        f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J runfiles\n#SBATCH -o runout.out\n#SBATCH '
                '-e runerr.out\ncd $SLURM_SUBMIT_DIR\nfor i in {0..%i}\ndo\n    dos2unix rxn_batch_"$i".sh\n    sbatch'
                ' rxn_batch_"$i".sh\ndone' % (rxn_num-1))
    with open(directory+'compile_solutions.sh', 'w+') as f:
        f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J compile\n#SBATCH -o compout.out\n#SBATCH '
                '-e comperr.out\ncd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\n'
                'source env/bin/activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n'
                'python dexom_python/cluster_utils/solution_compilation.py -p "*solutions.csv" -s %s -o %s' %
                (cplexpath, directory, directory))


[docs]def write_batch_script_divenum(directory, modelfile, weightfile, cplexpath, rxnsols, objtol, filenums=100, iters=100,
                               eps=DEFAULT_VALUES['epsilon'], thr=DEFAULT_VALUES['threshold'],
                               tol=DEFAULT_VALUES['tolerance'], timelim=DEFAULT_VALUES['timelimit']):
    os.makedirs(directory, exist_ok=True)
    if directory[-1] not in ['/', '\\']:
        directory += '/'
    if timelim is not None:
        t = '-t ' + str(timelim)
    else:
        t = ''
    for i in range(filenums):
        with open(directory+'batch_'+str(i)+'.sh', 'w+') as f:
            f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n'
                    '#SBATCH -t 05:00:00\n#SBATCH -J div_{i}\n#SBATCH -o divout{i}.out\n#SBATCH -e diverr{i}.out\n'
                    ''.format(i=i))
            f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/'
                    'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath)
            a = np.around((1-1/(filenums*2*(iters/10)))**i, 5)
            f.write('python dexom_python/enum_functions/diversity_enum_functions.py -o {d}div_enum_{i} -m {m} -r {w} -p'
                    ' {d}{r} -a {a} -i {n} --obj_tol {o} -e {e} --threshold {thr} --tol {tol} -s {i} '
                    '{t}'.format(d=directory, i=i, m=modelfile, w=weightfile, r=rxnsols, a=a, n=iters, o=objtol, e=eps,
                                 thr=thr, tol=tol, t=t))
    with open(directory+'runfiles.sh', 'w+') as f:
        f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J runfiles\n#SBATCH -o runout.out\n#SBATCH '
                '-e runerr.out\ncd $SLURM_SUBMIT_DIR\nfor i in {0..%i}\ndo\n    dos2unix batch_"$i".sh\n    sbatch'
                ' batch_"$i".sh\ndone' % (filenums-1))
    return True


[docs]def write_batch_script1(directory, modelfile, weightfile, cplexpath, reactionlist=None, imatsol=None,
                        objtol=DEFAULT_VALUES['obj_tol'], timelim=DEFAULT_VALUES['timelimit'], filenums=100, iters=100,
                        rxniters=5):
    """
    Writes bash scripts for dexom-python parallelization approach 1 on a slurm cluster. Within each batch,
    reaction-enumeration and diversity-enumeration are performed. These scripts assume that you have setup
    a virtual environment called env.

    Parameters
    ----------
    directory: str
        directory in which the files will be generated. If it does not exist, it will be created
    modelfile: str
        path to the model
    weightfile:
        path to the reaction weights
    cplexpath: str
        path to a cplex installation on the cluster
    reactionlist: str
        list of reactions for reaction-enumeration
    imatsol: str
        path to imat solution
    objtol: float
        objective tolerance
    timelim: int
        solver timelimit
    filenums: int
        number of parallel batches
    iters: int
        number of diversity-enumeration iterations per batch
    rxniters: int
        number of reaction-enumeration iterations per batch
    """
    os.makedirs(directory, exist_ok=True)
    if directory[-1] not in ['/', '\\']:
        directory += '/'
    if reactionlist is not None:
        rstring = '-l ' + reactionlist
    else:
        rstring = ''
    if imatsol is not None:
        istring = '-p ' + imatsol
    else:
        istring = ''
    if timelim is not None:
        t = '-t ' + str(timelim)
    else:
        t = ''
    for i in range(filenums):
        with open(directory+'batch_'+str(i)+'.sh', 'w+') as f:
            f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n'
                    '#SBATCH -t 12:00:00\n#SBATCH -J dexom1_%i\n#SBATCH -o dex1out%i.out\n#SBATCH -e dex1err%i.out\n'
                    % (i, i, i))
            f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/'
                    'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath)
            f.write('python dexom_python/enum_functions/rxn_enum_functions.py -o %srxn_enum_%i --range %i_%i -m %s -r '
                    '%s %s %s %s --save\n' % (directory, i, i*rxniters, i*rxniters+rxniters, modelfile, weightfile,
                                              rstring, istring, t))
            a = np.around((1-1/(filenums*2*(iters/10)))**i, 5)
            f.write('python dexom_python/enum_functions/diversity_enum_functions.py -o {d}div_enum_{i} -m {m} -r {w} -p '
                    '{d}rxn_enum_{i}_solution_1.csv -a {a} -i {n} --obj_tol {o} {t}'
                    ''.format(d=directory, i=i, m=modelfile, w=weightfile, a=a, n=iters, o=objtol, t=t))
    with open(directory+'runfiles.sh', 'w+') as f:
        f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J runfiles\n#SBATCH -o runout.out\n#SBATCH '
                '-e runerr.out\ncd $SLURM_SUBMIT_DIR\nfor i in {0..%i}\ndo\n    dos2unix batch_"$i".sh\n    sbatch'
                ' batch_"$i".sh\ndone' % (filenums-1))
    return True


[docs]def write_batch_script2(directory, modelfile, weightfile, cplexpath, objtol=DEFAULT_VALUES['obj_tol'],
                        timelim=DEFAULT_VALUES['timelimit'], rxnsols=100, filenums=100):
    """
    Writes bash scripts for dexom-python parallelization approach 2 on a slurm cluster. In this approach, indiviual
    diversity-enumeration iterations are laucnhed in each batch - this requires the existance of reaction-enumeration
    solutions beforehand. These scripts assume that you have setup a virtual environment called env.

    Parameters
    ----------
    directory: str
        directory in which the files will be generated
    modelfile: str
        path to the model
    weightfile:
        path to the reaction weights
    cplexpath: str
        path to a cplex installation on the cluster
    objtol: float
        objective tolerance
    timelim: int
        solver timelimit
    rxnsols: int
        number of reaction-enumeration solutions provided
    filenums: int
        number of parallel batches
    """
    os.makedirs(directory, exist_ok=True)
    if directory[-1] not in ['/', '\\']:
        directory += '/'
    paths = sorted(list(Path(directory).glob('*solution_*.csv')), key=os.path.getctime)
    paths.reverse()
    if len(paths) < rxnsols:
        warn('Approach 2 requires previous enumeration solutions as starting points. '
             '%i solutions were expected, but only %i were found in the folder.\n' % (rxnsols, len(paths)))
    if timelim is not None:
        t = '-t ' + str(timelim)
    else:
        t = ''
    for i in range(rxnsols):
        a = (1 - 1 / (rxnsols * 2 * (rxnsols / 10))) ** i
        with open(directory+'rxnstart_'+str(i)+'.sh', 'w+') as f:
            f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n'
                    '#SBATCH -t 00:10:00\n#SBATCH -J dexom2_%i\n#SBATCH -o dex2out%i.out\n#SBATCH -e dex2err%i.out\n'
                    % (i, i, i))
            f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/'
                    'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath)
            sol = str(paths[i]).replace('\\', '/')
            f.write('python dexom_python/enum_functions/diversity_enum_functions.py -o %sdiv_enum_%i -m %s -r %s -p '
                    '%s -a %.5f -i 1 --obj_tol %.4f --save %s'
                    % (directory, i, modelfile, weightfile, sol, a, objtol, t))
    a = (1 - 1 / (filenums * 2 * (filenums / 10)))
    with open(directory+'dexomstart.sh', 'w+') as f:
        f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n'
                '#SBATCH -t 01:00:00\n')
        f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/'
                'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath)
        f.write('python dexom_python/enum_functions/diversity_enum_functions.py -o %sdiv_enum -m %s -r %s -p '
                '%s -a %.5f -i 1 -s %i --obj_tol %.4f --save %s'
                % (directory, modelfile, weightfile, directory, a, filenums, objtol, t))
    with open(directory+'rundexoms.sh', 'w+') as f:
        f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J rundexoms\n#SBATCH -o runout.out\n#SBATCH '
                '-e runerr.out\ncd $SLURM_SUBMIT_DIR\nfor i in {0..%i}\ndo\n    dos2unix rxnstart_"$i".sh\n    sbatch '
                'rxnstart_"$i".sh\ndone\ndos2unix dexomstart.sh\nfor i in {0..%i}\ndo\n    sbatch -J dexomiter_"$i" '
                '-o dexout_"$i".out -e dexerr_"$i".out dexomstart.sh \ndone' % (rxnsols-1, filenums-1))
    return True


[docs]def main():
    """
    This function is called when you run this script from the commandline.
    It writes batch scripts for launching DEXOM on a slurm cluster.
    Note that default values are used for most parameters.
    This also assumes that you have a virtual environment called env in your project directory
    Use --help to see commandline parameters

    There are 3 approaches for using parallel batches in DEXOM:
    Approach 1: Within each batch, reaction-enumeration and diversity-enumeration are performed.
    Approach 2: Indiviual diversity-enumeration iterations are launched in each batch - this requires the existance
    of reaction-enumeration solutions beforehand.
    Approach 3: First, launch parallel reaction-enumeration batches. Then compile the solutions.
    Then diversity-enumeration batches can be launched using the compiled rxn-enum solutions as starting points.
    """
    description = 'Writes batch scripts for launching DEXOM on a slurm cluster. Note that default values are used' \
                  'for most parameters. This also assumes that you have a virtual environment called env in your' \
                  'project directory'

    parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-o', '--out_path', default='cluster/', help='Folder to which the files are written. '
                                                                     'The folder will be created if not present')
    parser.add_argument('-m', '--model', default=None, help='Metabolic model in sbml, json, or matlab format')
    parser.add_argument('-r', '--reaction_weights', default=None,
                        help='Reaction weights in csv format (first row: reaction names, second row: weights)')
    parser.add_argument('-l', '--reaction_list', default=None, help='list of reactions in the model')
    parser.add_argument('-p', '--prev_sol', default=None, help='starting solution')
    parser.add_argument('-c', '--cplex_path', help='path to the cplex solver',
                        default='/home/mstingl/save/CPLEX_Studio1210/cplex/python/3.7/x86-64_linux')
    parser.add_argument('--obj_tol', type=float, default=DEFAULT_VALUES['obj_tol'],
                        help='objective value tolerance, as a fraction of the original value')
    parser.add_argument('-n', '--filenums', type=int, default=100, help='number of parallel threads')
    parser.add_argument('-i', '--iterations', type=int, default=100, help='number of div-enum iterations per thread')
    parser.add_argument('--rxniters', type=int, default=5, help='number of rxn-enum iterations per thread')
    parser.add_argument('-a', '--approach', type=int, default=3, help='which parallelisation approach to use')
    args = parser.parse_args()

    os.makedirs(args.out_path, exist_ok=True)

    if args.reaction_list:
        reactionlist = args.reaction_list
    else:
        model = read_model(args.model)
        get_all_reactions_from_model(model, save=True, shuffle=True, out_path=args.out_path)
        reactionlist = args.out_path + model.id + '_reactions_shuffled.csv'

    if args.approach == 1:
        print('Approach 1: Within each batch, reaction-enumeration and diversity-enumeration are performed.')
        write_batch_script1(args.out_path, args.model, args.reaction_weights, args.cplex_path, reactionlist,
                            args.prev_sol, args.obj_tol, DEFAULT_VALUES['timelimit'], args.filenums, args.iterations,
                            args.rxniters)
    elif args.approach == 2:
        print('Approach 2: Indiviual diversity-enumeration iterations are launched in each batch - this requires the '
              'existance of reaction-enumeration solutions beforehand.')
        write_batch_script2(args.out_path, args.model, args.reaction_weights, args.cplex_path, args.obj_tol,
                            DEFAULT_VALUES['timelimit'], args.rxniters, args.filenums)
    elif args.approach == 3:
        print('Approach 3: First, launch parallel reaction-enumeration batches. Then compile the solutions. Then '
              'diversity-enumeration batches can be launched using the compiled rxn-enum solutions as starting points.')
        write_rxn_enum_script(args.out_path, args.model, args.reaction_weights, args.cplex_path, args.prev_sol,
                              reactionlist, args.obj_tol, DEFAULT_VALUES['epsilon'], DEFAULT_VALUES['threshold'],
                              DEFAULT_VALUES['tolerance'], DEFAULT_VALUES['timelimit'], args.rxniters, maxiters=1e10)
        write_batch_script_divenum(args.out_path, args.model, args.reaction_weights, args.cplex_path,
                                   'combined_solutions.csv', args.obj_tol, args.filenums, args.iterations,
                                   DEFAULT_VALUES['epsilon'], DEFAULT_VALUES['threshold'], DEFAULT_VALUES['tolerance'],
                                   DEFAULT_VALUES['timelimit'])
    else:
        print('approach parameter value must be 1, 2, or 3')
    return True


if __name__ == '__main__':
    main()