Source code for dexom_python.cluster_utils.write_cluster_scripts

import os
import argparse
import numpy as np
from pathlib import Path
from dexom_python.model_functions import read_model, get_all_reactions_from_model, DEFAULT_VALUES
from warnings import warn


[docs]def write_rxn_enum_script(directory, modelfile, weightfile, cplexpath, imatsol=None, reactionlist=None, objtol=DEFAULT_VALUES['obj_tol'], eps=DEFAULT_VALUES['epsilon'], thr=DEFAULT_VALUES['threshold'], tol=DEFAULT_VALUES['tolerance'], timelim=DEFAULT_VALUES['timelimit'], iters=100, maxiters=1e10): os.makedirs(directory, exist_ok=True) if directory[-1] not in ['/', '\\']: directory += '/' if reactionlist is not None: with open(reactionlist, 'r') as file: rxns = file.read().split('\n') n_max = len(rxns) if len(rxns) < maxiters else maxiters rxn_num = (n_max // iters) + 1 rstring = '-l ' + reactionlist else: rstring = '' model = read_model(modelfile) rxn_num = (len(model.reactions) // iters) + 1 if imatsol is not None: istring = '-p ' + imatsol else: istring = '' if timelim is not None: t = '-t ' + str(timelim) else: t = '' for i in range(rxn_num): with open(directory+'rxn_batch_' + str(i) + '.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n' '#SBATCH -t 10:00:00\n#SBATCH -J rxn_%i\n#SBATCH -o rxnout_%i.out\n#SBATCH -e rxnerr_%i.out\n' % (i, i, i)) f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/' 'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath) f.write('python dexom_python/enum_functions/rxn_enum_functions.py -o %srxn_enum_%i --range %i_%i -m %s -r ' '%s %s %s -t 6000 -e %s --threshold %s --tol %s --obj_tol %s %s\n' % (directory, i, i*iters, i*iters+iters, modelfile, weightfile, rstring, istring, eps, thr, tol, objtol, t)) with open(directory+'rxn_runfiles.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J runfiles\n#SBATCH -o runout.out\n#SBATCH ' '-e runerr.out\ncd $SLURM_SUBMIT_DIR\nfor i in {0..%i}\ndo\n dos2unix rxn_batch_"$i".sh\n sbatch' ' rxn_batch_"$i".sh\ndone' % (rxn_num-1)) with open(directory+'compile_solutions.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J compile\n#SBATCH -o compout.out\n#SBATCH ' '-e comperr.out\ncd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\n' 'source env/bin/activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' 'python dexom_python/cluster_utils/solution_compilation.py -p "*solutions.csv" -s %s -o %s' % (cplexpath, directory, directory))
[docs]def write_batch_script_divenum(directory, modelfile, weightfile, cplexpath, rxnsols, objtol, filenums=100, iters=100, eps=DEFAULT_VALUES['epsilon'], thr=DEFAULT_VALUES['threshold'], tol=DEFAULT_VALUES['tolerance'], timelim=DEFAULT_VALUES['timelimit']): os.makedirs(directory, exist_ok=True) if directory[-1] not in ['/', '\\']: directory += '/' if timelim is not None: t = '-t ' + str(timelim) else: t = '' for i in range(filenums): with open(directory+'batch_'+str(i)+'.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n' '#SBATCH -t 05:00:00\n#SBATCH -J div_{i}\n#SBATCH -o divout{i}.out\n#SBATCH -e diverr{i}.out\n' ''.format(i=i)) f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/' 'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath) a = np.around((1-1/(filenums*2*(iters/10)))**i, 5) f.write('python dexom_python/enum_functions/diversity_enum_functions.py -o {d}div_enum_{i} -m {m} -r {w} -p' ' {d}{r} -a {a} -i {n} --obj_tol {o} -e {e} --threshold {thr} --tol {tol} -s {i} ' '{t}'.format(d=directory, i=i, m=modelfile, w=weightfile, r=rxnsols, a=a, n=iters, o=objtol, e=eps, thr=thr, tol=tol, t=t)) with open(directory+'runfiles.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J runfiles\n#SBATCH -o runout.out\n#SBATCH ' '-e runerr.out\ncd $SLURM_SUBMIT_DIR\nfor i in {0..%i}\ndo\n dos2unix batch_"$i".sh\n sbatch' ' batch_"$i".sh\ndone' % (filenums-1)) return True
[docs]def write_batch_script1(directory, modelfile, weightfile, cplexpath, reactionlist=None, imatsol=None, objtol=DEFAULT_VALUES['obj_tol'], timelim=DEFAULT_VALUES['timelimit'], filenums=100, iters=100, rxniters=5): """ Writes bash scripts for dexom-python parallelization approach 1 on a slurm cluster. Within each batch, reaction-enumeration and diversity-enumeration are performed. These scripts assume that you have setup a virtual environment called env. Parameters ---------- directory: str directory in which the files will be generated. If it does not exist, it will be created modelfile: str path to the model weightfile: path to the reaction weights cplexpath: str path to a cplex installation on the cluster reactionlist: str list of reactions for reaction-enumeration imatsol: str path to imat solution objtol: float objective tolerance timelim: int solver timelimit filenums: int number of parallel batches iters: int number of diversity-enumeration iterations per batch rxniters: int number of reaction-enumeration iterations per batch """ os.makedirs(directory, exist_ok=True) if directory[-1] not in ['/', '\\']: directory += '/' if reactionlist is not None: rstring = '-l ' + reactionlist else: rstring = '' if imatsol is not None: istring = '-p ' + imatsol else: istring = '' if timelim is not None: t = '-t ' + str(timelim) else: t = '' for i in range(filenums): with open(directory+'batch_'+str(i)+'.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n' '#SBATCH -t 12:00:00\n#SBATCH -J dexom1_%i\n#SBATCH -o dex1out%i.out\n#SBATCH -e dex1err%i.out\n' % (i, i, i)) f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/' 'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath) f.write('python dexom_python/enum_functions/rxn_enum_functions.py -o %srxn_enum_%i --range %i_%i -m %s -r ' '%s %s %s %s --save\n' % (directory, i, i*rxniters, i*rxniters+rxniters, modelfile, weightfile, rstring, istring, t)) a = np.around((1-1/(filenums*2*(iters/10)))**i, 5) f.write('python dexom_python/enum_functions/diversity_enum_functions.py -o {d}div_enum_{i} -m {m} -r {w} -p ' '{d}rxn_enum_{i}_solution_1.csv -a {a} -i {n} --obj_tol {o} {t}' ''.format(d=directory, i=i, m=modelfile, w=weightfile, a=a, n=iters, o=objtol, t=t)) with open(directory+'runfiles.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J runfiles\n#SBATCH -o runout.out\n#SBATCH ' '-e runerr.out\ncd $SLURM_SUBMIT_DIR\nfor i in {0..%i}\ndo\n dos2unix batch_"$i".sh\n sbatch' ' batch_"$i".sh\ndone' % (filenums-1)) return True
[docs]def write_batch_script2(directory, modelfile, weightfile, cplexpath, objtol=DEFAULT_VALUES['obj_tol'], timelim=DEFAULT_VALUES['timelimit'], rxnsols=100, filenums=100): """ Writes bash scripts for dexom-python parallelization approach 2 on a slurm cluster. In this approach, indiviual diversity-enumeration iterations are laucnhed in each batch - this requires the existance of reaction-enumeration solutions beforehand. These scripts assume that you have setup a virtual environment called env. Parameters ---------- directory: str directory in which the files will be generated modelfile: str path to the model weightfile: path to the reaction weights cplexpath: str path to a cplex installation on the cluster objtol: float objective tolerance timelim: int solver timelimit rxnsols: int number of reaction-enumeration solutions provided filenums: int number of parallel batches """ os.makedirs(directory, exist_ok=True) if directory[-1] not in ['/', '\\']: directory += '/' paths = sorted(list(Path(directory).glob('*solution_*.csv')), key=os.path.getctime) paths.reverse() if len(paths) < rxnsols: warn('Approach 2 requires previous enumeration solutions as starting points. ' '%i solutions were expected, but only %i were found in the folder.\n' % (rxnsols, len(paths))) if timelim is not None: t = '-t ' + str(timelim) else: t = '' for i in range(rxnsols): a = (1 - 1 / (rxnsols * 2 * (rxnsols / 10))) ** i with open(directory+'rxnstart_'+str(i)+'.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n' '#SBATCH -t 00:10:00\n#SBATCH -J dexom2_%i\n#SBATCH -o dex2out%i.out\n#SBATCH -e dex2err%i.out\n' % (i, i, i)) f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/' 'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath) sol = str(paths[i]).replace('\\', '/') f.write('python dexom_python/enum_functions/diversity_enum_functions.py -o %sdiv_enum_%i -m %s -r %s -p ' '%s -a %.5f -i 1 --obj_tol %.4f --save %s' % (directory, i, modelfile, weightfile, sol, a, objtol, t)) a = (1 - 1 / (filenums * 2 * (filenums / 10))) with open(directory+'dexomstart.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mail-type=ALL\n#SBATCH --mem=64G\n#SBATCH -c 24\n' '#SBATCH -t 01:00:00\n') f.write('cd $SLURM_SUBMIT_DIR\ncd ..\nmodule purge\nmodule load system/Python-3.7.4\nsource env/bin/' 'activate\nexport PYTHONPATH=${PYTHONPATH}:"%s"\n' % cplexpath) f.write('python dexom_python/enum_functions/diversity_enum_functions.py -o %sdiv_enum -m %s -r %s -p ' '%s -a %.5f -i 1 -s %i --obj_tol %.4f --save %s' % (directory, modelfile, weightfile, directory, a, filenums, objtol, t)) with open(directory+'rundexoms.sh', 'w+') as f: f.write('#!/bin/bash\n#SBATCH --mail-type=ALL\n#SBATCH -J rundexoms\n#SBATCH -o runout.out\n#SBATCH ' '-e runerr.out\ncd $SLURM_SUBMIT_DIR\nfor i in {0..%i}\ndo\n dos2unix rxnstart_"$i".sh\n sbatch ' 'rxnstart_"$i".sh\ndone\ndos2unix dexomstart.sh\nfor i in {0..%i}\ndo\n sbatch -J dexomiter_"$i" ' '-o dexout_"$i".out -e dexerr_"$i".out dexomstart.sh \ndone' % (rxnsols-1, filenums-1)) return True
[docs]def main(): """ This function is called when you run this script from the commandline. It writes batch scripts for launching DEXOM on a slurm cluster. Note that default values are used for most parameters. This also assumes that you have a virtual environment called env in your project directory Use --help to see commandline parameters There are 3 approaches for using parallel batches in DEXOM: Approach 1: Within each batch, reaction-enumeration and diversity-enumeration are performed. Approach 2: Indiviual diversity-enumeration iterations are launched in each batch - this requires the existance of reaction-enumeration solutions beforehand. Approach 3: First, launch parallel reaction-enumeration batches. Then compile the solutions. Then diversity-enumeration batches can be launched using the compiled rxn-enum solutions as starting points. """ description = 'Writes batch scripts for launching DEXOM on a slurm cluster. Note that default values are used' \ 'for most parameters. This also assumes that you have a virtual environment called env in your' \ 'project directory' parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-o', '--out_path', default='cluster/', help='Folder to which the files are written. ' 'The folder will be created if not present') parser.add_argument('-m', '--model', default=None, help='Metabolic model in sbml, json, or matlab format') parser.add_argument('-r', '--reaction_weights', default=None, help='Reaction weights in csv format (first row: reaction names, second row: weights)') parser.add_argument('-l', '--reaction_list', default=None, help='list of reactions in the model') parser.add_argument('-p', '--prev_sol', default=None, help='starting solution') parser.add_argument('-c', '--cplex_path', help='path to the cplex solver', default='/home/mstingl/save/CPLEX_Studio1210/cplex/python/3.7/x86-64_linux') parser.add_argument('--obj_tol', type=float, default=DEFAULT_VALUES['obj_tol'], help='objective value tolerance, as a fraction of the original value') parser.add_argument('-n', '--filenums', type=int, default=100, help='number of parallel threads') parser.add_argument('-i', '--iterations', type=int, default=100, help='number of div-enum iterations per thread') parser.add_argument('--rxniters', type=int, default=5, help='number of rxn-enum iterations per thread') parser.add_argument('-a', '--approach', type=int, default=3, help='which parallelisation approach to use') args = parser.parse_args() os.makedirs(args.out_path, exist_ok=True) if args.reaction_list: reactionlist = args.reaction_list else: model = read_model(args.model) get_all_reactions_from_model(model, save=True, shuffle=True, out_path=args.out_path) reactionlist = args.out_path + model.id + '_reactions_shuffled.csv' if args.approach == 1: print('Approach 1: Within each batch, reaction-enumeration and diversity-enumeration are performed.') write_batch_script1(args.out_path, args.model, args.reaction_weights, args.cplex_path, reactionlist, args.prev_sol, args.obj_tol, DEFAULT_VALUES['timelimit'], args.filenums, args.iterations, args.rxniters) elif args.approach == 2: print('Approach 2: Indiviual diversity-enumeration iterations are launched in each batch - this requires the ' 'existance of reaction-enumeration solutions beforehand.') write_batch_script2(args.out_path, args.model, args.reaction_weights, args.cplex_path, args.obj_tol, DEFAULT_VALUES['timelimit'], args.rxniters, args.filenums) elif args.approach == 3: print('Approach 3: First, launch parallel reaction-enumeration batches. Then compile the solutions. Then ' 'diversity-enumeration batches can be launched using the compiled rxn-enum solutions as starting points.') write_rxn_enum_script(args.out_path, args.model, args.reaction_weights, args.cplex_path, args.prev_sol, reactionlist, args.obj_tol, DEFAULT_VALUES['epsilon'], DEFAULT_VALUES['threshold'], DEFAULT_VALUES['tolerance'], DEFAULT_VALUES['timelimit'], args.rxniters, maxiters=1e10) write_batch_script_divenum(args.out_path, args.model, args.reaction_weights, args.cplex_path, 'combined_solutions.csv', args.obj_tol, args.filenums, args.iterations, DEFAULT_VALUES['epsilon'], DEFAULT_VALUES['threshold'], DEFAULT_VALUES['tolerance'], DEFAULT_VALUES['timelimit']) else: print('approach parameter value must be 1, 2, or 3') return True
if __name__ == '__main__': main()