Skip to content

Commit 6321764

Browse files
committed
Merge branch 'master' of github.com:sate-dev/sate-core
2 parents 4c2733f + 8a5b199 commit 6321764

File tree

5 files changed

+68
-21
lines changed

5 files changed

+68
-21
lines changed

sate/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
If configuration files are read in the order they occur as arguments (with values in later files replacing previously read values). Options specified in the command line are read last. Thus these values "overwrite" any settings from the configuration files.
4141
"""
4242

43+
TEMP_SEQ_ALIGNMENT_TAG = "seq_alignment.txt"
44+
TEMP_TREE_TAG = "tree.tre"
4345
__all__ = []
4446

4547
import os

sate/filemgr.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def get_input_source_directory(self):
390390
# single locus dataset: return directory nanme
391391
return os.path.dirname(os.path.abspath(options.input))
392392

393-
def get_abs_path_for_iter_output(self, iter_num, out_tag):
393+
def get_abs_path_for_iter_output(self, iter_num, out_tag, allow_existing=False):
394394
"""
395395
Returns an absolute path or None for the file for an iteration temporary
396396
file for iteration `iter_num` with the specificed `out_tag`
@@ -403,15 +403,16 @@ def get_abs_path_for_iter_output(self, iter_num, out_tag):
403403
"""
404404
p = "iteration_" + str(iter_num) + '_' + out_tag
405405
o_path = self.output_prefix + "_temp_" + p
406-
if os.path.exists(o_path):
407-
n = 1
408-
while os.path.exists(o_path):
409-
t_tag = "_temp%d_" % n
410-
if n > 100:
411-
_LOG.warn('File %s exists iteration-specific output skipped!' % o_path)
412-
return None # don't create a huge # of numbered files
413-
o_path = self.output_prefix + t_tag + p
414-
n += 1
406+
if not allow_existing:
407+
if os.path.exists(o_path):
408+
n = 1
409+
while os.path.exists(o_path):
410+
t_tag = "_temp%d_" % n
411+
if n > 100:
412+
_LOG.warn('File %s exists iteration-specific output skipped!' % o_path)
413+
return None # don't create a huge # of numbered files
414+
o_path = self.output_prefix + t_tag + p
415+
n += 1
415416
return os.path.abspath(o_path)
416417

417418
def get_abs_path_for_tag(self, out_tag):

sate/mainsate.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
from sate.utility import IndentedHelpFormatterWithNL
4141
from sate.filemgr import open_with_intermediates
4242
from sate import filemgr
43+
from sate import TEMP_SEQ_ALIGNMENT_TAG, TEMP_TREE_TAG
44+
4345

4446
_RunningJobs = None
4547

@@ -171,6 +173,9 @@ def finish_sate_execution(sate_team,
171173
# We must read the incoming tree in before we call the get_sequences_for_sate
172174
# function that relabels that taxa in the dataset
173175
######
176+
alignment_as_tmp_filename_to_report = None
177+
tree_as_tmp_filename_to_report = None
178+
174179
tree_file = options.treefile
175180
if tree_file:
176181
if not os.path.exists(tree_file):
@@ -183,6 +188,7 @@ def finish_sate_execution(sate_team,
183188
MESSENGER.send_warning('%d starting trees found in "%s". The first tree will be used.' % (len(tree_list), tree_file))
184189
starting_tree = tree_list[0]
185190
score = None
191+
tree_as_tmp_filename_to_report = tree_file
186192

187193
############################################################################
188194
# This will relabel the taxa if they have problematic names
@@ -271,6 +277,8 @@ def finish_sate_execution(sate_team,
271277
jobq.put(job)
272278
score, starting_tree_str = job.get_results()
273279
_RunningJobs = None
280+
alignment_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output("initialsearch", TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True)
281+
tree_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output("initialsearch", TEMP_TREE_TAG, allow_existing=True)
274282
if delete_tree_temps:
275283
sate_team.temp_fs.remove_dir(init_tree_dir)
276284
_LOG.debug('We have the tree and whole_alignment, partitions...')
@@ -289,6 +297,8 @@ def finish_sate_execution(sate_team,
289297
score=score,
290298
**sate_config_dict)
291299
job.tree_str = starting_tree_str
300+
job.curr_iter_align_tmp_filename = alignment_as_tmp_filename_to_report
301+
job.curr_iter_tree_tmp_filename = tree_as_tmp_filename_to_report
292302
if score is not None:
293303
job.store_optimum_results(new_multilocus_dataset=multilocus_dataset,
294304
new_tree_str=starting_tree_str,
@@ -302,6 +312,11 @@ def finish_sate_execution(sate_team,
302312
MESSENGER.send_info("Starting SATe algorithm on initial tree...")
303313
job.run(tmp_dir_par=temporaries_dir, sate_products=sate_products)
304314
_RunningJobs = None
315+
316+
if job.return_final_tree_and_alignment:
317+
alignment_as_tmp_filename_to_report = job.curr_iter_align_tmp_filename
318+
else:
319+
alignment_as_tmp_filename_to_report = job.best_alignment_tmp_filename
305320

306321
if user_config.commandline.raxml_search_after:
307322
raxml_model = user_config.raxml.model.strip()
@@ -327,26 +342,31 @@ def finish_sate_execution(sate_team,
327342
jobq.put(post_job)
328343
post_score, post_tree = post_job.get_results()
329344
_RunningJobs = None
345+
tree_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output("postraxtree", TEMP_TREE_TAG, allow_existing=True)
330346
if delete_tree_temps:
331347
sate_team.temp_fs.remove_dir(post_tree_dir)
332348
job.tree_str = post_tree
333349
job.score = post_score
334350
if post_score > job.best_score:
335351
job.best_tree_str = post_tree
336352
job.best_score = post_score
353+
else:
354+
if job.return_final_tree_and_alignment:
355+
tree_as_tmp_filename_to_report = job.curr_iter_tree_tmp_filename
356+
else:
357+
tree_as_tmp_filename_to_report = job.best_tree_tmp_filename
337358

338-
339359

340360
job.multilocus_dataset.restore_taxon_names()
341361
assert len(sate_products.alignment_streams) == len(job.multilocus_dataset)
342362
for i, alignment in enumerate(job.multilocus_dataset):
343363
alignment_stream = sate_products.alignment_streams[i]
344-
MESSENGER.send_info("Writing final alignment to %s" % alignment_stream.name)
364+
MESSENGER.send_info("Writing resulting alignment to %s" % alignment_stream.name)
345365
alignment.write(alignment_stream, file_format="FASTA")
346366
alignment_stream.close()
347367

348368

349-
MESSENGER.send_info("Writing final tree to %s" % sate_products.tree_stream.name)
369+
MESSENGER.send_info("Writing resulting tree to %s" % sate_products.tree_stream.name)
350370
tree_str = job.tree.compose_newick()
351371
sate_products.tree_stream.write("%s;\n" % tree_str)
352372

@@ -357,13 +377,19 @@ def finish_sate_execution(sate_team,
357377
# outtree_fn = os.path.join(seqdir, "combined_%s.tre" % options.job)
358378
# else:
359379
# outtree_fn = aln_filename + ".tre"
360-
#MESSENGER.send_info("Writing final tree to %s" % outtree_fn)
380+
#MESSENGER.send_info("Writing resulting tree to %s" % outtree_fn)
361381
#tree_str = job.tree.compose_newick()
362382
#sate_products.tree_stream.write("%s;\n" % tree_str)
363383

364384

365-
MESSENGER.send_info("Writing final likelihood score to %s" % sate_products.score_stream.name)
385+
MESSENGER.send_info("Writing resulting likelihood score to %s" % sate_products.score_stream.name)
366386
sate_products.score_stream.write("%s\n" % job.score)
387+
388+
if alignment_as_tmp_filename_to_report is not None:
389+
MESSENGER.send_info('The resulting alignment (with the names in a "safe" form) was first written as the file "%s"' % alignment_as_tmp_filename_to_report)
390+
if tree_as_tmp_filename_to_report is not None:
391+
MESSENGER.send_info('The resulting tree (with the names in a "safe" form) was first written as the file "%s"' % tree_as_tmp_filename_to_report)
392+
367393
finally:
368394
for el in prev_signals:
369395
sig, prev_handler = el

sate/satejob.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
from sate.utility import record_timestamp
3838
from sate.scheduler import jobq
3939
from sate.filemgr import TempFS
40+
from sate import TEMP_SEQ_ALIGNMENT_TAG, TEMP_TREE_TAG
41+
4042

4143
class SateTeam (object):
4244
'''A blob for holding the appropriate merger, alignment, and tree_estimator tools
@@ -149,9 +151,6 @@ def _reset_current_run_settings(self):
149151
self.start_time = None
150152
self.current_iteration = 0
151153
self.last_improvement_time = None
152-
#self.best_multilocus_dataset = None
153-
#self.best_tree_str = self.get_tree_str()
154-
#self.best_score = self.score
155154
self.num_iter_since_imp = 0
156155
self.is_stuck_in_blind = False
157156
self.switch_to_blind_iter = None
@@ -160,6 +159,11 @@ def _reset_current_run_settings(self):
160159
self._blindmode_trigger = None
161160
self._sate_alignment_job = None
162161
self._tree_build_job = None
162+
self.curr_iter_align_tmp_filename = None
163+
self.curr_iter_tree_tmp_filename = None
164+
self.best_tree_tmp_filename = None
165+
self.best_alignment_tmp_filename = None
166+
163167

164168
def _reset_jobs(self):
165169
self.tree_build_job = None
@@ -299,6 +303,8 @@ def store_optimum_results(self, new_multilocus_dataset, new_tree_str, new_score,
299303
self.best_score = new_score
300304
self.last_improvement_time = curr_timestamp
301305
self.num_iter_since_imp = 0
306+
self.best_tree_tmp_filename = self.curr_iter_tree_tmp_filename
307+
self.best_alignment_tmp_filename = self.curr_iter_align_tmp_filename
302308

303309
def run(self, tmp_dir_par, sate_products=None):
304310
assert(os.path.exists(tmp_dir_par))
@@ -377,6 +383,11 @@ def run(self, tmp_dir_par, sate_products=None):
377383
delete_temps=delete_iteration_temps,
378384
sate_products=sate_products,
379385
step_num=self.current_iteration)
386+
prev_curr_align = self.curr_iter_align_tmp_filename
387+
prev_curr_tree = self.curr_iter_tree_tmp_filename
388+
self.curr_iter_align_tmp_filename = sate_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True)
389+
self.curr_iter_tree_tmp_filename = sate_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_TREE_TAG, allow_existing=True)
390+
380391
self.tree_build_job = tbj
381392
jobq.put(tbj)
382393
new_score, new_tree_str = tbj.get_results()
@@ -414,12 +425,18 @@ def run(self, tmp_dir_par, sate_products=None):
414425
self.score = new_score
415426
self.multilocus_dataset = new_multilocus_dataset
416427
self.tree_str = new_tree_str
417-
self.status('realignment accepted.')
428+
if this_iter_score_improved:
429+
self.status('realignment accepted and score improved.')
430+
else:
431+
self.status('realignment accepted and despite the score not improving.')
418432
# we do not want to continue to try different breaking strategies for this iteration so we break
419433
self.status('current score: %s, best score: %s' % (self.score, self.best_score) )
420434
break
421435
else:
422436
self.status('realignment NOT accepted.')
437+
self.curr_iter_align_tmp_filename = prev_curr_align
438+
self.curr_iter_tree_tmp_filename = prev_curr_tree
439+
423440
break_strategy_index += 1
424441

425442
# self.status('current score: %s, best score: %s' % (self.score, self.best_score) )

sate/tools.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
from alignment import Alignment
3131
from sate import get_logger, GLOBAL_DEBUG, SATE_SYSTEM_PATHS_CFGFILE, DEFAULT_MAX_MB
32+
from sate import TEMP_SEQ_ALIGNMENT_TAG, TEMP_TREE_TAG
3233
from sate.filemgr import open_with_intermediates
3334
from sate.scheduler import jobq, start_worker, DispatchableJob, FakeJob
3435

@@ -78,7 +79,7 @@ def read_internal_alignment(fn,
7879

7980
def copy_temp_tree(src_treef, sate_products, step_num):
8081
if (sate_products is not None) and (step_num is not None):
81-
dest_treef = sate_products.get_abs_path_for_iter_output(step_num, 'tree.tre')
82+
dest_treef = sate_products.get_abs_path_for_iter_output(step_num, TEMP_TREE_TAG)
8283
if dest_treef and os.path.exists(src_treef):
8384
if os.path.exists(dest_treef):
8485
_LOG.warn('File "%s" exists. It will not be overwritten' % dest_treef)
@@ -539,7 +540,7 @@ def store_input(self, seqfn, **kwargs):
539540
if sate_products:
540541
step_num = kwargs.get('step_num')
541542
if step_num is not None:
542-
i_concat_align = sate_products.get_abs_path_for_iter_output(step_num, 'seq_alignment.txt')
543+
i_concat_align = sate_products.get_abs_path_for_iter_output(step_num, TEMP_SEQ_ALIGNMENT_TAG)
543544
if i_concat_align and os.path.exists(seqfn):
544545
if os.path.exists(i_concat_align):
545546
_LOG.warn('File "%s" exists. It will not be overwritten' % i_concat_align)

0 commit comments

Comments
 (0)