gtsubsampler: subsamplr.py comparison

"planemo upload for repository https://toolrepo.galaxytrakr.org/"

comparison

equal deleted inserted replaced

-:504004e78363
+:a90a883f88f9
 with ExitStack() as stack:
 ins = [stack.enter_context(openn(path, 'r')) for openn, path in zip(file_openers, ins)] # opened input files
 inns = [iter(grouper(inn, 4)) for inn in ins] # stateful 4-ply iterator over lines in the input
 outs = [stack.enter_context(openn(path, 'w')) for openn, path in zip(file_openers, outs)] # opened output files
+for file in ins:
+print(file.name)
 # https://en.m.wikipedia.org/wiki/Reservoir_sampling
 reservoir = []
 # this is going to be 1 or 2-tuples of 4-tuples representing the 4 lines of the fastq file
 # we determine its current coverage (and thus its reservoir size) to fill it, which consumes reads
 # from the open files
-for readpair in zip(*inns):
+reads = 0
+for i, readpair in enumerate(zip(*inns)):
+reads += len(readpair[0][1])
 reservoir.append(readpair)
-if coverage(reservoir, gen_size) > cov:
+if reads / gen_size > cov:
 break
 k = len(reservoir) # this is about how big the reservoir needs to be to get cov coverage
 #W = exp(log(random.random()) / k)

Mercurial > repos > jpayne > gtsubsampler