diff subsamplr.py @ 4:a90a883f88f9

"planemo upload for repository https://toolrepo.galaxytrakr.org/"
author jpayne
date Fri, 19 Feb 2021 15:28:20 -0500
parents b2915e7e9dfa
children 3852b3edc8a4
line wrap: on
line diff
--- a/subsamplr.py	Fri Feb 19 14:57:58 2021 -0500
+++ b/subsamplr.py	Fri Feb 19 15:28:20 2021 -0500
@@ -69,15 +69,20 @@
     inns = [iter(grouper(inn, 4)) for inn in ins] # stateful 4-ply iterator over lines in the input
     outs = [stack.enter_context(openn(path, 'w')) for openn, path in zip(file_openers, outs)] # opened output files
 
+    for file in ins:
+        print(file.name)
+
     # https://en.m.wikipedia.org/wiki/Reservoir_sampling
 
     reservoir = []
     # this is going to be 1 or 2-tuples of 4-tuples representing the 4 lines of the fastq file
     # we determine its current coverage (and thus its reservoir size) to fill it, which consumes reads
     # from the open files
-    for readpair in zip(*inns):
+    reads = 0
+    for i, readpair in enumerate(zip(*inns)):
+        reads += len(readpair[0][1])
         reservoir.append(readpair)
-        if coverage(reservoir, gen_size) > cov:
+        if reads / gen_size > cov:
             break
 
     k = len(reservoir) # this is about how big the reservoir needs to be to get cov coverage