changeset 12:fc77995bc4da

planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author jpayne
date Wed, 08 May 2024 00:32:13 -0400
parents 7fd0ef5842e7
children f550715358f1
files bio2srr.py test-data/accessions.txt test-data/metadata.tsv
diffstat 3 files changed, 33 insertions(+), 188 deletions(-) [+]
line wrap: on
line diff
--- a/bio2srr.py	Mon May 06 01:42:27 2024 -0400
+++ b/bio2srr.py	Wed May 08 00:32:13 2024 -0400
@@ -162,9 +162,9 @@
     return sampledict
 
 
-def yield_sra_runs_from_sample(biosampleids):
+def yield_sra_runs_from_sample(biosample):
     sleep(1 if not api_key else 0.1)
-    response = requests.get(elink, params=dict(id=",".join(biosampleids), dbfrom="biosample", db="sra", format="json", **extra_params))
+    response = requests.get(elink, params=dict(id=biosample, dbfrom="biosample", db="sra", format="json", **extra_params))
     response.raise_for_status()
     reply = response.json()
     for ids in batched(reply.get("linksets", [{}])[0].get("linksetdbs", [{}])[0].get("links", []), 200):
@@ -185,6 +185,8 @@
 def flatten_runs(runxml):
     root = xml.fromstring(f"<data>{runxml}</data>") # gotta fix their garbage embedded XML since it isn't singly-rooted
     for run in root.findall(".//Run"):
+        if run.attrib["is_public"] == "false":
+            logger.warning(f"Skipping non-public run {run.attrib['acc']}")
         yield dict(
             sra_run_accession = run.attrib["acc"],
             total_spots = run.attrib["total_spots"],
@@ -212,10 +214,14 @@
             log(biosample_xml)
             raise
         sampledict["bioproject"] = bioproject
+        noruns = True
         for sra, runs in yield_sra_runs_from_sample(biosample):
             for run in flatten_runs(runs.strip()):
+                noruns = False
                 run.update(sampledict)
                 rows.append(run)
+        if noruns:
+            rows.append(sampledict)
 
     log(f"Writing {len(rows)} rows to metadata.tsv")
 
@@ -225,7 +231,7 @@
             header.add(key)
 
     header = sorted(list(header), key=hso)
-    logger.info(f"Header: {header}")
+    # logger.info(f"Header: {header}")
 
     rows.sort(key=lambda x: x["biosample_accession"])
 
@@ -234,11 +240,24 @@
         writer.writeheader()
         writer.writerows(rows)
 
-    log(f"Writing {len(rows)} accessions to accessions.txt")
+    # check for duplicate runs and unreleased samples
+
+    accessions = [row.get("sra_run_accession") for row in rows if row.get("sra_run_accession")]
+
+    raw_length = len(accessions)
+
+    accessions = sorted(list(set(accessions)))
+
+    if raw_length < len(rows):
+        logger.warning(f"Bioproject {starting_bioproject} contains unreleased samples. {len(rows) - raw_length} samples will not be included in accessions.txt")
+
+    if len(accessions) < raw_length:
+        logger.warning(f"Some SRA runs may have been reached through multiple projects or samples. accessions.txt will be deduplicated but the metadata table is not")
+
+    log(f"Writing {len(accessions)} unique accessions to accessions.txt")
 
     with open("accessions.txt", "w") as f:
-        for row in rows:
-            f.write(row["sra_run_accession"] + "\n")
+        f.writelines(accessions)
 
 
 if __name__ == "__main__":
--- a/test-data/accessions.txt	Mon May 06 01:42:27 2024 -0400
+++ b/test-data/accessions.txt	Wed May 08 00:32:13 2024 -0400
@@ -1,91 +1,1 @@
-SRR288080
-SRR005375
-SRR005372
-SRR000090
-SRR000091
-SRR000092
-SRR000093
-SRR000094
-SRR000095
-SRR000078
-SRR000079
-SRR000072
-SRR000073
-SRR000074
-SRR000075
-SRR000076
-SRR000077
-SRR000070
-SRR000071
-SRR288080
-SRR000090
-SRR000091
-SRR000092
-SRR000093
-SRR000094
-SRR000095
-SRR000078
-SRR000079
-SRR000070
-SRR000071
-SRR288080
-SRR000090
-SRR000091
-SRR000092
-SRR000093
-SRR000094
-SRR000095
-SRR000080
-SRR000081
-SRR000078
-SRR000079
-SRR000070
-SRR000071
-SRR288080
-SRR005375
-SRR005372
-SRR000090
-SRR000091
-SRR000092
-SRR000093
-SRR000094
-SRR000095
-SRR000078
-SRR000079
-SRR000072
-SRR000073
-SRR000074
-SRR000075
-SRR000076
-SRR000077
-SRR000070
-SRR000071
-SRR000068
-SRR000069
-SRR288080
-SRR000080
-SRR000081
-SRR000078
-SRR000079
-SRR000068
-SRR000069
-SRR000066
-SRR000067
-SRR288080
-SRR287817
-SRR000089
-SRR000088
-SRR000087
-SRR000086
-SRR000085
-SRR000084
-SRR000083
-SRR000082
-SRR000080
-SRR000081
-SRR000078
-SRR000079
-SRR000068
-SRR000069
-SRR000066
-SRR000067
+SRR13160357SRR13160358SRR13160359SRR13160360SRR13167188
\ No newline at end of file
--- a/test-data/metadata.tsv	Mon May 06 01:42:27 2024 -0400
+++ b/test-data/metadata.tsv	Wed May 08 00:32:13 2024 -0400
@@ -1,92 +1,8 @@
 bioproject	biosample_accession	organism	taxid	package	Genus	ProjectAccession	PublicAccession	Species	attribute_package	collected_by	collection_date	geo_loc_name	isolate	isolate_name_alias	isolation_source	lat_lon	project_name	sequenced_by	sra_run_accession	strain	total_bases	total_spots
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR288080	ECP19-2498	1008246	3835
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR005375	ECP19-2498	63477063	237172
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR005372	ECP19-2498	21805775	88278
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000090	ECP19-2498	59522375	222843
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000091	ECP19-2498	392964	1467
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000092	ECP19-2498	872292	3261
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000093	ECP19-2498	60878431	227850
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000094	ECP19-2498	1311175	4908
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000095	ECP19-2498	592711	2214
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000078	ECP19-2498	35726106	136244
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000079	ECP19-2498	33865731	128606
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000072	ECP19-2498	43110538	164772
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000073	ECP19-2498	834018	3206
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000074	ECP19-2498	1191933	4540
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000075	ECP19-2498	817514	3107
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000076	ECP19-2498	53028372	201721
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000077	ECP19-2498	322254	1226
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000070	ECP19-2498	69214301	262057
-PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000071	ECP19-2498	56794062	215192
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR288080	ECP19-598	1008246	3835
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000090	ECP19-598	59522375	222843
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000091	ECP19-598	392964	1467
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000092	ECP19-598	872292	3261
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000093	ECP19-598	60878431	227850
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000094	ECP19-598	1311175	4908
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000095	ECP19-598	592711	2214
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000078	ECP19-598	35726106	136244
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000079	ECP19-598	33865731	128606
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000070	ECP19-598	69214301	262057
-PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000071	ECP19-598	56794062	215192
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR288080	ECP19-798	1008246	3835
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000090	ECP19-798	59522375	222843
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000091	ECP19-798	392964	1467
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000092	ECP19-798	872292	3261
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000093	ECP19-798	60878431	227850
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000094	ECP19-798	1311175	4908
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000095	ECP19-798	592711	2214
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000080	ECP19-798	42230342	158320
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000081	ECP19-798	48201615	180220
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000078	ECP19-798	35726106	136244
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000079	ECP19-798	33865731	128606
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000070	ECP19-798	69214301	262057
-PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000071	ECP19-798	56794062	215192
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR288080	ECP19-198	1008246	3835
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR005375	ECP19-198	63477063	237172
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR005372	ECP19-198	21805775	88278
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000090	ECP19-198	59522375	222843
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000091	ECP19-198	392964	1467
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000092	ECP19-198	872292	3261
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000093	ECP19-198	60878431	227850
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000094	ECP19-198	1311175	4908
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000095	ECP19-198	592711	2214
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000078	ECP19-198	35726106	136244
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000079	ECP19-198	33865731	128606
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000072	ECP19-198	43110538	164772
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000073	ECP19-198	834018	3206
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000074	ECP19-198	1191933	4540
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000075	ECP19-198	817514	3107
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000076	ECP19-198	53028372	201721
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000077	ECP19-198	322254	1226
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000070	ECP19-198	69214301	262057
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000071	ECP19-198	56794062	215192
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000068	ECP19-198	63395546	247135
-PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000069	ECP19-198	57476129	224837
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR288080	CJP19-D445	1008246	3835
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000080	CJP19-D445	42230342	158320
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000081	CJP19-D445	48201615	180220
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000078	CJP19-D445	35726106	136244
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000079	CJP19-D445	33865731	128606
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000068	CJP19-D445	63395546	247135
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000069	CJP19-D445	57476129	224837
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000066	CJP19-D445	63790620	242673
-PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000067	CJP19-D445	66936400	255351
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR288080	CJP19-D996	1008246	3835
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR287817	CJP19-D996	155025677	1414888
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000089	CJP19-D996	64994909	250945
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000088	CJP19-D996	62912540	242861
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000087	CJP19-D996	1051130	4049
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000086	CJP19-D996	525756	2047
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000085	CJP19-D996	436118	1684
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000084	CJP19-D996	466139	1803
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000083	CJP19-D996	1251016	4841
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000082	CJP19-D996	1227889	4753
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000080	CJP19-D996	42230342	158320
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000081	CJP19-D996	48201615	180220
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000078	CJP19-D996	35726106	136244
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000079	CJP19-D996	33865731	128606
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000068	CJP19-D996	63395546	247135
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000069	CJP19-D996	57476129	224837
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000066	CJP19-D996	63790620	242673
-PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR000067	CJP19-D996	66936400	255351
+PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR13160357	ECP19-2498	312756765	660858
+PRJNA681235	SAMN16946945	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091029	coli	environmental/food/other	CDC	2019	USA		CFSAN091029	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR13160358	ECP19-2498	327001270	704624
+PRJNA681235	SAMN16946946	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091027	coli	environmental/food/other	CDC	2019	USA		CFSAN091027	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR13160360	ECP19-598	316865532	683880
+PRJNA681235	SAMN16946947	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091028	coli	environmental/food/other	CDC	2019	USA		CFSAN091028	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR13160359	ECP19-798	473318585	1007158
+PRJNA681235	SAMN16956340	Escherichia coli O157:H7	83334	Pathogen.env.1.0	Escherichia	PRJNA681235	CFSAN091030	coli	environmental/food/other	CDC	2019	USA		CFSAN091030	missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition	SRR13167188	ECP19-198	385043067	827691
+PRJNA681235	SAMN17131267	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091031		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition		CJP19-D445		
+PRJNA681235	SAMN17131268	Campylobacter jejuni	197	Pathogen.env.1.0						CDC	missing	missing	CFSAN091032		missing	missing	GenomeTrakr	FDA Center for Food Safety and Applied Nutrition		CJP19-D996