# HG changeset patch
# User jpayne
# Date 1714713463 14400
# Node ID 2d4a2159c74b9c5d3649e92790bc0cc836a7bb5f
# Parent 80f1001797c7b2e9c85dda1071e51f720b35c3e8
planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
diff -r 80f1001797c7 -r 2d4a2159c74b __pycache__/bio2srr.cpython-312.pyc
Binary file __pycache__/bio2srr.cpython-312.pyc has changed
diff -r 80f1001797c7 -r 2d4a2159c74b __pycache__/tests.cpython-312-pytest-8.2.0.pyc
Binary file __pycache__/tests.cpython-312-pytest-8.2.0.pyc has changed
diff -r 80f1001797c7 -r 2d4a2159c74b bio2srr.py
--- a/bio2srr.py Wed Oct 27 05:00:45 2021 -0400
+++ b/bio2srr.py Fri May 03 01:17:43 2024 -0400
@@ -1,125 +1,233 @@
-#! /usr/bin/env python3
-
-"Grab SRR numbers from BioProjects via the EMBL-ENA REST API's."
-
-import requests
-import sys
-from xml.etree import ElementTree as xml
-import csv
-
-from time import sleep
-
-sra_exp_query = "https://www.ebi.ac.uk/ebisearch/ws/rest/sra-experiment?query={bioproject}"
-
-sample = """{
- "hitCount": 2,
- "entries": [
- {
- "id": "SRX377510",
- "source": "sra-experiment"
- },
- {
- "id": "SRX583279",
- "source": "sra-experiment"
- }
- ],
- "facets": []
-}"""
-
-data_query = "?display=xml"
-
-sra_run_query = "https://www.ebi.ac.uk/ebisearch/ws/rest/sra-run"
-
-sample = """{
- "hitCount": 1,
- "entries": [
- {
- "id": "SRR1029665",
- "source": "sra-run"
- }
- ],
- "facets": []
-}"""
-
-def get_tag(root, tag):
- val = root.find(tag)
- if val:
- return val.text
-
-if __name__ == "__main__":
- try:
- bioproject = sys.argv[1]
-
- b_result = None
-
- runs = []
-
- while not b_result or len(runs) < b_result['hitCount']:
- b_result = requests.get(sra_run_query, params=dict(query=bioproject, start=len(runs)), headers=dict(Accept="application/json"))
- b_result.raise_for_status()
- b_result = b_result.json()
- runs += [d['id'] for d in b_result['entries']]
-
- if not runs:
- print(f"No results found for '{bioproject}'.", file=sys.stderr)
- quit(1)
- except IndexError:
- raise ValueError("Please provide an NCBI BioProject, NCBI BioSample, EMBL Project, or EMBL Study accession.")
-
- try:
- with open(sys.argv[2], 'r') as f:
- rdr = csv.DictReader(f, dialect='excel', delimiter='\t')
- rcds = list(rdr)
-
-
- except IndexError:
- rcds = []
-
- bsams = []
-
- for id in runs:
- res = requests.get(
- f"https://www.ebi.ac.uk/ebisearch/ws/rest/sra-run/entry/{id}/xref/sra-sample",
- headers=dict(Accept="application/json")
- )
- res.raise_for_status()
- bsams.append(res.json()['entries'][0]['references'][0]['acc'])
- sleep(.1)
-
- # res = requests.get(xref_query.format(runs=",".join(runs)), headers=dict(Accept="application/json"))
- # res.raise_for_status()
- # bsams = [(e['id'], e['references'][0]['acc']) for e in res.json()['entries']]
-
- for run_id, sam_id in zip(runs, bsams):
- print(run_id)
- record = {}
- record['sample'] = run_id
- record['biosample_accession'] = sam_id
- res = requests.get(
- f"https://www.ebi.ac.uk/ena/browser/api/xml/{sam_id}"
- )
- res.raise_for_status()
- root = xml.fromstring(res.text)
-
- record['submitter_id'] = get_tag(root, './/SUBMITTER_ID')
- record['scientific_name'] = get_tag(root, './/SCIENTIFIC_NAME')
-
- for attr in root.findall('.//SAMPLE_ATTRIBUTE'):
- key, value = iter(attr)
- record[key.text] = value.text
- rcds.append(record)
- sleep(.1)
-
- headers = {}
- for record in rcds:
- for key in record.keys():
- headers[key] = None # use a dict to preserve header order
-
-
- with open('./metadata.tsv', 'w') as f:
- wtr = csv.DictWriter(f, dialect='excel', delimiter='\t', fieldnames=headers.keys())
- wtr.writeheader()
- wtr.writerows(rcds)
-
-
-
+"Grab SRR numbers from Bioprojects and sub-bioprojects via Eutils"
+
+import requests
+import sys
+import csv
+
+
+from itertools import batched
+from functools import cmp_to_key
+from time import sleep
+from xml.etree import ElementTree as xml
+
+esearch = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+esummary = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
+elink = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"
+
+
+import logging
+logging.basicConfig(level=logging.INFO)
+
+logger = logging.getLogger("bio2srr")
+
+def log(msg):
+ logger.info(msg) # fix logging later
+
+def get_tag(root, tag):
+ val = root.find(tag)
+ if val is not None:
+ return val.text
+ log(f"No result for {tag}")
+
+
+
+def header_sort_override(a, b):
+ if a == b:
+ return 0
+ try:
+ for name in ["bioproject", "srr_accession", "biosample_accession", "organism", "taxid", "package",]:
+ if a == name:
+ return -1
+ if b == name:
+ return 1
+ except:
+ pass
+ if a < b:
+ return -1
+ else:
+ return 1
+
+hso = cmp_to_key(header_sort_override)
+
+def resolve_bioproject_ids_and_links(bioproject_id_list):
+ "Recursively follow bioproject and biosample links, yield biosample UID's and biosample XML"
+ for i, (bioproject, bioproject_id) in enumerate(bioproject_id_list):
+ log(f"Processing {bioproject} ({bioproject_id}) {i+1}/{len(bioproject_id_list)}")
+ #get bioproject to bioproject links
+ response = requests.get(elink, params=dict(db="bioproject", dbfrom="bioproject", id=bioproject_id, format="json"))
+ response.raise_for_status()
+ reply = response.json()
+ linksets = reply.get("linksets", [{}])[0].get("linksetdbs", [0,0,{}])
+ if len(linksets) >= 3:
+ for id in linksets[2].get("links", []): #third index is the up to down links
+ response = requests.get(esummary, params=dict(id=id, db="bioproject", format="json"))
+ response.raise_for_status()
+ replyy = response.json()
+ biop = replyy["result"][id]["project_acc"]
+ if id not in bioproject_id_list:
+ bioproject_id_list.append((biop, id)) # recurse over bioproject links
+ # get bioproject to biosample links
+ response = requests.get(elink, params=dict(db="biosample", dbfrom="bioproject", id=bioproject_id, format="json"))
+ response.raise_for_status()
+ reply = response.json()
+ links = reply.get("linksets", [{}])[0].get("linksetdbs", [{}])[0].get("links", [])
+ log(f"Found {len(links)} biosample links for {bioproject} ({bioproject_id})")
+ for ids in batched(links, 200):
+ response = requests.get(esummary, params=dict(id=",".join(ids), db="biosample", format="json"))
+ response.raise_for_status()
+ replyy = response.json()
+ for field, value in replyy.get("result", {}).items():
+ if "uids" not in field:
+ yield bioproject, field, value["sampledata"] # this is XML, deleriously
+ sleep(0.1)
+
+
+biosample_example = """
+
+
+ SAMN17131268
+ CJP19-D996
+
+
+ Pathogen: environmental/food/other sample from Campylobacter jejuni
+
+ Campylobacter jejuni
+
+
+
+ FDA Center for Food Safety and Applied Nutrition
+
+
+ Pathogen.env
+
+ Pathogen.env.1.0
+
+ CJP19-D996
+ missing
+ missing
+ CDC
+ missing
+ missing
+ CFSAN091032
+ GenomeTrakr
+ FDA Center for Food Safety and Applied Nutrition
+
+
+ 681235
+
+
+
+
+"""
+
+def flatten_biosample_xml(biosampxml):
+ root = xml.fromstring(biosampxml)
+ accession = get_tag(root, r'.//Id[@db="BioSample"]')
+ # sample_name = get_tag(root, r'.//Id[@db_label="Sample name"]')
+ organism = get_tag(root, r".//OrganismName")
+ tax_id = root.find(r".//Organism").attrib.get("taxonomy_id")
+ package = get_tag(root, r".//Package")
+ sampledict = dict(
+ biosample_accession=accession,
+ # sample_name=sample_name,
+ organism = organism,
+ taxid = tax_id,
+ package = package
+ )
+ for attribute in root.findall("Attributes/Attribute"):
+ sampledict[attribute.attrib.get("harmonized_name", attribute.attrib['attribute_name'])] = attribute.text
+
+ return sampledict
+
+
+def yield_sra_runs_from_sample(biosampleids):
+ sleep(0.1)
+ response = requests.get(elink, params=dict(id=",".join(biosampleids), dbfrom="biosample", db="sra", format="json"))
+ response.raise_for_status()
+ reply = response.json()
+ for ids in batched(reply.get("linksets", [{}])[0].get("linksetdbs", [{}])[0].get("links", []), 200):
+ sleep(0.1)
+ response = requests.get(esummary, params=dict(id=','.join(ids), db="sra", format="json"))
+ response.raise_for_status()
+ replyy = response.json()
+ for field, value in replyy.get("result", {}).items():
+ if "uids" not in field:
+ yield field, value.get("runs")
+
+
+runs_example = """
+
+
+"""
+
+def flatten_runs(runxml):
+ root = xml.fromstring(f"{runxml}") # gotta fix their garbage embedded XML since it isn't singly-rooted
+ for run in root.findall(".//Run"):
+ yield dict(
+ sra_run_accession = run.attrib["acc"],
+ total_spots = run.attrib["total_spots"],
+ total_bases = run.attrib["total_bases"],
+ )
+
+
+
+def main(starting_bioproject):
+ rows = []
+ response = requests.get(esearch, params=dict(db="bioproject", term=starting_bioproject, field="PRJA", format="json"))
+ response.raise_for_status()
+ reply = response.json()
+ try:
+ bioproject_id = reply["esearchresult"]["idlist"][0]
+ log(f"Found UID {bioproject_id} for '{starting_bioproject}'")
+ except IndexError:
+ logger.error(f"No results found for '{starting_bioproject}'. Error was \"{reply['esearchresult']['warninglist']['outputmessages']}\"")
+ sys.exit(1)
+ for bioproject, biosample, biosample_xml in resolve_bioproject_ids_and_links([(starting_bioproject, bioproject_id)]):
+ try:
+ sampledict = flatten_biosample_xml(biosample_xml)
+ except KeyError:
+ log(biosample_xml)
+ raise
+ sampledict["bioproject"] = bioproject
+ for sra, runs in yield_sra_runs_from_sample(biosample):
+ for run in flatten_runs(runs.strip()):
+ run.update(sampledict)
+ rows.append(run)
+
+ log(f"Writing {len(rows)} rows to metadata.tsv")
+
+ header = set()
+ for row in rows:
+ for key in row.keys():
+ header.add(key)
+
+ header = sorted(list(header), key=hso)
+ logger.info(f"Header: {header}")
+
+ rows.sort(key=lambda x: x["biosample_accession"])
+
+ with open("metadata.tsv", "w") as f:
+ writer = csv.DictWriter(f, fieldnames=header, delimiter="\t", dialect="excel")
+ writer.writeheader()
+ writer.writerows(rows)
+
+ log(f"Writing {len(rows)} accessions to accessions.txt")
+
+ with open("accessions.txt", "w") as f:
+ for row in rows:
+ f.write(row["sra_run_accession"] + "\n")
+
+
+if __name__ == "__main__":
+ b = sys.argv[1].strip()
+ log(f"Starting with {b}")
+ try:
+ main(b)
+ except requests.HTTPError as e:
+ logger.error(e)
+ sys.exit(1)
+
+
+
+
+
diff -r 80f1001797c7 -r 2d4a2159c74b bio2srr.xml
--- a/bio2srr.xml Wed Oct 27 05:00:45 2021 -0400
+++ b/bio2srr.xml Fri May 03 01:17:43 2024 -0400
@@ -1,35 +1,31 @@
-
- Retrieve SRR accessions and sample metadata from BioProject or BioSample.
+
+ Retrieve SRR accessions and sample metadata from BioProject. Recursively follows links to subprojects.
- python
- requests
+ python
+ requests
"$output"
- #else
- $__tool_directory__/bio2srr.py "$input1" > "$output"
- #end if
+ python $__tool_directory__/bio2srr.py "$input1"
]]>
-
-
+
-
+
-
-
+
+
+
+
+
diff -r 80f1001797c7 -r 2d4a2159c74b test-data/accessions.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/accessions.txt Fri May 03 01:17:43 2024 -0400
@@ -0,0 +1,91 @@
+SRR288080
+SRR005375
+SRR005372
+SRR000090
+SRR000091
+SRR000092
+SRR000093
+SRR000094
+SRR000095
+SRR000078
+SRR000079
+SRR000072
+SRR000073
+SRR000074
+SRR000075
+SRR000076
+SRR000077
+SRR000070
+SRR000071
+SRR288080
+SRR000090
+SRR000091
+SRR000092
+SRR000093
+SRR000094
+SRR000095
+SRR000078
+SRR000079
+SRR000070
+SRR000071
+SRR288080
+SRR000090
+SRR000091
+SRR000092
+SRR000093
+SRR000094
+SRR000095
+SRR000080
+SRR000081
+SRR000078
+SRR000079
+SRR000070
+SRR000071
+SRR288080
+SRR005375
+SRR005372
+SRR000090
+SRR000091
+SRR000092
+SRR000093
+SRR000094
+SRR000095
+SRR000078
+SRR000079
+SRR000072
+SRR000073
+SRR000074
+SRR000075
+SRR000076
+SRR000077
+SRR000070
+SRR000071
+SRR000068
+SRR000069
+SRR288080
+SRR000080
+SRR000081
+SRR000078
+SRR000079
+SRR000068
+SRR000069
+SRR000066
+SRR000067
+SRR288080
+SRR287817
+SRR000089
+SRR000088
+SRR000087
+SRR000086
+SRR000085
+SRR000084
+SRR000083
+SRR000082
+SRR000080
+SRR000081
+SRR000078
+SRR000079
+SRR000068
+SRR000069
+SRR000066
+SRR000067
diff -r 80f1001797c7 -r 2d4a2159c74b test-data/metadata.tsv
--- a/test-data/metadata.tsv Wed Oct 27 05:00:45 2021 -0400
+++ b/test-data/metadata.tsv Fri May 03 01:17:43 2024 -0400
@@ -1,3 +1,92 @@
-sample biosample_accession isolation_source collection_date geo_loc_name lat_lon rel_to_oxygen samp_collect_device samp_mat_process samp_size source_material_id BioSampleModel ENA-SPOT-COUNT ENA-BASE-COUNT ENA-FIRST-PUBLIC ENA-LAST-UPDATE
-SRR11671300 SAMN14820590 marine sediment Not applicable North Sea: German Bight (Helgoland Mud Area) 54.052300 N 7.580400 E obligate anaerobe anoxic sampling of sediment slurry incubations at defined time points Previously collected sediments from Helgoland mud area, stored at 4ºC (near in-situ temperature) until use as starting material for incubation experiments. For the incubation experiments, anaerobic slurries were prepared in ratio 1:4 and incubated at 30ºC. Sample name identifies the unique information of each sample including the target 16S rRNA gene, incubation timepoint and state of the enrichment. 1 ml slurry in triplicates, Pooled DNA from triplicates were sequenced as 1 sample. sediment incubation at 30ºC without any amendment sampled after 105 days sequenced with bacteria PCR primers Metagenome or environmental 41855 6160440 2020-05-04 2020-05-04
-SRR11671283 SAMN14820597 marine sediment Not applicable North Sea: German Bight (Helgoland Mud Area) 54.052300 N 7.580400 E obligate anaerobe anoxic sampling of sediment slurry incubations at defined time points Previously collected sediments from Helgoland mud area, stored at 4ºC (near in-situ temperature) until use as starting material for incubation experiments. For the incubation experiments, anaerobic slurries were prepared in ratio 1:4 and incubated at 30ºC. Sample name identifies the unique information of each sample including the target 16S rRNA gene, incubation timepoint and state of the enrichment. 1 ml slurry in triplicates, Pooled DNA from triplicates were sequenced as 1 sample. sediment incubation at 30ºC without any amendment sampled after 105 days sequenced with archaea PCR primers Metagenome or environmental 19833 2973256 2020-05-04 2020-05-04
+bioproject biosample_accession organism taxid package Genus ProjectAccession PublicAccession Species attribute_package collected_by collection_date geo_loc_name isolate isolate_name_alias isolation_source lat_lon project_name sequenced_by sra_run_accession strain total_bases total_spots
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 ECP19-2498 1008246 3835
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR005375 ECP19-2498 63477063 237172
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR005372 ECP19-2498 21805775 88278
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000090 ECP19-2498 59522375 222843
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000091 ECP19-2498 392964 1467
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000092 ECP19-2498 872292 3261
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000093 ECP19-2498 60878431 227850
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000094 ECP19-2498 1311175 4908
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000095 ECP19-2498 592711 2214
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 ECP19-2498 35726106 136244
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 ECP19-2498 33865731 128606
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000072 ECP19-2498 43110538 164772
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000073 ECP19-2498 834018 3206
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000074 ECP19-2498 1191933 4540
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000075 ECP19-2498 817514 3107
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000076 ECP19-2498 53028372 201721
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000077 ECP19-2498 322254 1226
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000070 ECP19-2498 69214301 262057
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000071 ECP19-2498 56794062 215192
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 ECP19-598 1008246 3835
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000090 ECP19-598 59522375 222843
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000091 ECP19-598 392964 1467
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000092 ECP19-598 872292 3261
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000093 ECP19-598 60878431 227850
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000094 ECP19-598 1311175 4908
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000095 ECP19-598 592711 2214
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 ECP19-598 35726106 136244
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 ECP19-598 33865731 128606
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000070 ECP19-598 69214301 262057
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000071 ECP19-598 56794062 215192
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 ECP19-798 1008246 3835
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000090 ECP19-798 59522375 222843
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000091 ECP19-798 392964 1467
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000092 ECP19-798 872292 3261
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000093 ECP19-798 60878431 227850
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000094 ECP19-798 1311175 4908
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000095 ECP19-798 592711 2214
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000080 ECP19-798 42230342 158320
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000081 ECP19-798 48201615 180220
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 ECP19-798 35726106 136244
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 ECP19-798 33865731 128606
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000070 ECP19-798 69214301 262057
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000071 ECP19-798 56794062 215192
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 ECP19-198 1008246 3835
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR005375 ECP19-198 63477063 237172
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR005372 ECP19-198 21805775 88278
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000090 ECP19-198 59522375 222843
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000091 ECP19-198 392964 1467
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000092 ECP19-198 872292 3261
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000093 ECP19-198 60878431 227850
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000094 ECP19-198 1311175 4908
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000095 ECP19-198 592711 2214
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 ECP19-198 35726106 136244
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 ECP19-198 33865731 128606
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000072 ECP19-198 43110538 164772
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000073 ECP19-198 834018 3206
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000074 ECP19-198 1191933 4540
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000075 ECP19-198 817514 3107
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000076 ECP19-198 53028372 201721
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000077 ECP19-198 322254 1226
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000070 ECP19-198 69214301 262057
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000071 ECP19-198 56794062 215192
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000068 ECP19-198 63395546 247135
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000069 ECP19-198 57476129 224837
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 CJP19-D445 1008246 3835
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000080 CJP19-D445 42230342 158320
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000081 CJP19-D445 48201615 180220
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 CJP19-D445 35726106 136244
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 CJP19-D445 33865731 128606
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000068 CJP19-D445 63395546 247135
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000069 CJP19-D445 57476129 224837
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000066 CJP19-D445 63790620 242673
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000067 CJP19-D445 66936400 255351
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 CJP19-D996 1008246 3835
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR287817 CJP19-D996 155025677 1414888
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000089 CJP19-D996 64994909 250945
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000088 CJP19-D996 62912540 242861
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000087 CJP19-D996 1051130 4049
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000086 CJP19-D996 525756 2047
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000085 CJP19-D996 436118 1684
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000084 CJP19-D996 466139 1803
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000083 CJP19-D996 1251016 4841
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000082 CJP19-D996 1227889 4753
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000080 CJP19-D996 42230342 158320
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000081 CJP19-D996 48201615 180220
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 CJP19-D996 35726106 136244
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 CJP19-D996 33865731 128606
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000068 CJP19-D996 63395546 247135
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000069 CJP19-D996 57476129 224837
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000066 CJP19-D996 63790620 242673
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000067 CJP19-D996 66936400 255351
diff -r 80f1001797c7 -r 2d4a2159c74b test-data/metadata.tsv.bak
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metadata.tsv.bak Fri May 03 01:17:43 2024 -0400
@@ -0,0 +1,92 @@
+bioproject biosample_accession organism taxid package total_bases lat_lon geo_loc_name collection_date isolate_name_alias isolate sra_run_accession Species sequenced_by strain isolation_source attribute_package project_name total_spots ProjectAccession collected_by PublicAccession Genus
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1008246 missing USA 2019 CFSAN091029 SRR288080 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 3835 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 63477063 missing USA 2019 CFSAN091029 SRR005375 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 237172 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 21805775 missing USA 2019 CFSAN091029 SRR005372 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 88278 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 59522375 missing USA 2019 CFSAN091029 SRR000090 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 222843 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 392964 missing USA 2019 CFSAN091029 SRR000091 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 1467 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 872292 missing USA 2019 CFSAN091029 SRR000092 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 3261 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 60878431 missing USA 2019 CFSAN091029 SRR000093 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 227850 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1311175 missing USA 2019 CFSAN091029 SRR000094 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 4908 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 592711 missing USA 2019 CFSAN091029 SRR000095 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 2214 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 35726106 missing USA 2019 CFSAN091029 SRR000078 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 136244 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 33865731 missing USA 2019 CFSAN091029 SRR000079 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 128606 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 43110538 missing USA 2019 CFSAN091029 SRR000072 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 164772 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 834018 missing USA 2019 CFSAN091029 SRR000073 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 3206 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1191933 missing USA 2019 CFSAN091029 SRR000074 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 4540 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 817514 missing USA 2019 CFSAN091029 SRR000075 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 3107 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 53028372 missing USA 2019 CFSAN091029 SRR000076 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 201721 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 322254 missing USA 2019 CFSAN091029 SRR000077 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 1226 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 69214301 missing USA 2019 CFSAN091029 SRR000070 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 262057 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 56794062 missing USA 2019 CFSAN091029 SRR000071 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 215192 PRJNA681235 CDC CFSAN091029 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1008246 missing USA 2019 CFSAN091027 SRR288080 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 3835 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 59522375 missing USA 2019 CFSAN091027 SRR000090 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 222843 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 392964 missing USA 2019 CFSAN091027 SRR000091 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 1467 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 872292 missing USA 2019 CFSAN091027 SRR000092 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 3261 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 60878431 missing USA 2019 CFSAN091027 SRR000093 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 227850 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1311175 missing USA 2019 CFSAN091027 SRR000094 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 4908 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 592711 missing USA 2019 CFSAN091027 SRR000095 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 2214 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 35726106 missing USA 2019 CFSAN091027 SRR000078 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 136244 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 33865731 missing USA 2019 CFSAN091027 SRR000079 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 128606 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 69214301 missing USA 2019 CFSAN091027 SRR000070 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 262057 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 56794062 missing USA 2019 CFSAN091027 SRR000071 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 215192 PRJNA681235 CDC CFSAN091027 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1008246 missing USA 2019 CFSAN091028 SRR288080 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 3835 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 59522375 missing USA 2019 CFSAN091028 SRR000090 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 222843 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 392964 missing USA 2019 CFSAN091028 SRR000091 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 1467 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 872292 missing USA 2019 CFSAN091028 SRR000092 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 3261 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 60878431 missing USA 2019 CFSAN091028 SRR000093 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 227850 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1311175 missing USA 2019 CFSAN091028 SRR000094 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 4908 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 592711 missing USA 2019 CFSAN091028 SRR000095 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 2214 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 42230342 missing USA 2019 CFSAN091028 SRR000080 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 158320 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 48201615 missing USA 2019 CFSAN091028 SRR000081 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 180220 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 35726106 missing USA 2019 CFSAN091028 SRR000078 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 136244 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 33865731 missing USA 2019 CFSAN091028 SRR000079 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 128606 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 69214301 missing USA 2019 CFSAN091028 SRR000070 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 262057 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 56794062 missing USA 2019 CFSAN091028 SRR000071 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 215192 PRJNA681235 CDC CFSAN091028 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1008246 missing USA 2019 CFSAN091030 SRR288080 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 3835 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 63477063 missing USA 2019 CFSAN091030 SRR005375 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 237172 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 21805775 missing USA 2019 CFSAN091030 SRR005372 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 88278 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 59522375 missing USA 2019 CFSAN091030 SRR000090 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 222843 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 392964 missing USA 2019 CFSAN091030 SRR000091 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 1467 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 872292 missing USA 2019 CFSAN091030 SRR000092 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 3261 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 60878431 missing USA 2019 CFSAN091030 SRR000093 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 227850 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1311175 missing USA 2019 CFSAN091030 SRR000094 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 4908 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 592711 missing USA 2019 CFSAN091030 SRR000095 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 2214 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 35726106 missing USA 2019 CFSAN091030 SRR000078 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 136244 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 33865731 missing USA 2019 CFSAN091030 SRR000079 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 128606 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 43110538 missing USA 2019 CFSAN091030 SRR000072 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 164772 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 834018 missing USA 2019 CFSAN091030 SRR000073 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 3206 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1191933 missing USA 2019 CFSAN091030 SRR000074 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 4540 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 817514 missing USA 2019 CFSAN091030 SRR000075 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 3107 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 53028372 missing USA 2019 CFSAN091030 SRR000076 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 201721 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 322254 missing USA 2019 CFSAN091030 SRR000077 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 1226 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 69214301 missing USA 2019 CFSAN091030 SRR000070 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 262057 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 56794062 missing USA 2019 CFSAN091030 SRR000071 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 215192 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 63395546 missing USA 2019 CFSAN091030 SRR000068 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 247135 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 57476129 missing USA 2019 CFSAN091030 SRR000069 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 224837 PRJNA681235 CDC CFSAN091030 Escherichia
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 1008246 missing missing missing CFSAN091031 SRR288080 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 3835 CDC
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 42230342 missing missing missing CFSAN091031 SRR000080 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 158320 CDC
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 48201615 missing missing missing CFSAN091031 SRR000081 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 180220 CDC
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 35726106 missing missing missing CFSAN091031 SRR000078 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 136244 CDC
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 33865731 missing missing missing CFSAN091031 SRR000079 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 128606 CDC
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 63395546 missing missing missing CFSAN091031 SRR000068 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 247135 CDC
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 57476129 missing missing missing CFSAN091031 SRR000069 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 224837 CDC
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 63790620 missing missing missing CFSAN091031 SRR000066 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 242673 CDC
+PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 66936400 missing missing missing CFSAN091031 SRR000067 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 255351 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 1008246 missing missing missing CFSAN091032 SRR288080 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 3835 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 155025677 missing missing missing CFSAN091032 SRR287817 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 1414888 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 64994909 missing missing missing CFSAN091032 SRR000089 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 250945 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 62912540 missing missing missing CFSAN091032 SRR000088 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 242861 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 1051130 missing missing missing CFSAN091032 SRR000087 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 4049 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 525756 missing missing missing CFSAN091032 SRR000086 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 2047 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 436118 missing missing missing CFSAN091032 SRR000085 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 1684 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 466139 missing missing missing CFSAN091032 SRR000084 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 1803 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 1251016 missing missing missing CFSAN091032 SRR000083 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 4841 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 1227889 missing missing missing CFSAN091032 SRR000082 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 4753 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 42230342 missing missing missing CFSAN091032 SRR000080 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 158320 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 48201615 missing missing missing CFSAN091032 SRR000081 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 180220 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 35726106 missing missing missing CFSAN091032 SRR000078 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 136244 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 33865731 missing missing missing CFSAN091032 SRR000079 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 128606 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 63395546 missing missing missing CFSAN091032 SRR000068 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 247135 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 57476129 missing missing missing CFSAN091032 SRR000069 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 224837 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 63790620 missing missing missing CFSAN091032 SRR000066 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 242673 CDC
+PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 66936400 missing missing missing CFSAN091032 SRR000067 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 255351 CDC
diff -r 80f1001797c7 -r 2d4a2159c74b test-data/test.txt
--- a/test-data/test.txt Wed Oct 27 05:00:45 2021 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-SRR11671300
-SRR11671283
diff -r 80f1001797c7 -r 2d4a2159c74b tests.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests.py Fri May 03 01:17:43 2024 -0400
@@ -0,0 +1,42 @@
+import pytest
+
+from bio2srr import *
+
+
+def test_element_tree_xpath():
+ from xml.etree import ElementTree as xml
+ root = xml.fromstring(biosample_example)
+ assert root.find(".//Id[@db='BioSample']") is not None
+
+def test_flatten_biosample_xml():
+ d = flatten_biosample_xml(biosample_example)
+ assert d['biosample_accession'] == 'SAMN17131268'
+ assert d['organism'] == 'Campylobacter jejuni'
+ assert d['isolate'] == 'CFSAN091032'
+
+def test_flatten_runs():
+ d = list(flatten_runs(runs_example))
+ assert len(d) == 2
+
+def test_header_sort_override_consistency():
+ import random
+ L = ["C", "B", "A", "taxid", "bioproject"]
+ L.sort(key=hso)
+ # assert L[0] == "bioproject"
+ A = L.copy()
+ assert A == L
+ R = []
+ for _ in range(100):
+ random.shuffle(A)
+ A.sort(key=hso)
+ R.append(A == L)
+ assert all(R)
+
+def test_hso_override():
+ assert header_sort_override("bioproject", "taxid") < 0
+ assert header_sort_override("taxid", "bioproject") > 0
+ assert header_sort_override("taxid", "taxid") == 0
+
+def test_hso_regular():
+ assert header_sort_override("A", "B") < 0
+ assert header_sort_override("B", "A") > 0
\ No newline at end of file