Mercurial > repos > jpayne > bioproject_to_srr_2
changeset 4:2d4a2159c74b
planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author | jpayne |
---|---|
date | Fri, 03 May 2024 01:17:43 -0400 |
parents | 80f1001797c7 |
children | 1a3038b6d1dd |
files | __pycache__/bio2srr.cpython-312.pyc __pycache__/tests.cpython-312-pytest-8.2.0.pyc bio2srr.py bio2srr.xml test-data/accessions.txt test-data/metadata.tsv test-data/metadata.tsv.bak test-data/test.txt tests.py |
diffstat | 9 files changed, 563 insertions(+), 147 deletions(-) [+] |
line wrap: on
line diff
--- a/bio2srr.py Wed Oct 27 05:00:45 2021 -0400 +++ b/bio2srr.py Fri May 03 01:17:43 2024 -0400 @@ -1,125 +1,233 @@ -#! /usr/bin/env python3 - -"Grab SRR numbers from BioProjects via the EMBL-ENA REST API's." - -import requests -import sys -from xml.etree import ElementTree as xml -import csv - -from time import sleep - -sra_exp_query = "https://www.ebi.ac.uk/ebisearch/ws/rest/sra-experiment?query={bioproject}" - -sample = """{ - "hitCount": 2, - "entries": [ - { - "id": "SRX377510", - "source": "sra-experiment" - }, - { - "id": "SRX583279", - "source": "sra-experiment" - } - ], - "facets": [] -}""" - -data_query = "?display=xml" - -sra_run_query = "https://www.ebi.ac.uk/ebisearch/ws/rest/sra-run" - -sample = """{ - "hitCount": 1, - "entries": [ - { - "id": "SRR1029665", - "source": "sra-run" - } - ], - "facets": [] -}""" - -def get_tag(root, tag): - val = root.find(tag) - if val: - return val.text - -if __name__ == "__main__": - try: - bioproject = sys.argv[1] - - b_result = None - - runs = [] - - while not b_result or len(runs) < b_result['hitCount']: - b_result = requests.get(sra_run_query, params=dict(query=bioproject, start=len(runs)), headers=dict(Accept="application/json")) - b_result.raise_for_status() - b_result = b_result.json() - runs += [d['id'] for d in b_result['entries']] - - if not runs: - print(f"No results found for '{bioproject}'.", file=sys.stderr) - quit(1) - except IndexError: - raise ValueError("Please provide an NCBI BioProject, NCBI BioSample, EMBL Project, or EMBL Study accession.") - - try: - with open(sys.argv[2], 'r') as f: - rdr = csv.DictReader(f, dialect='excel', delimiter='\t') - rcds = list(rdr) - - - except IndexError: - rcds = [] - - bsams = [] - - for id in runs: - res = requests.get( - f"https://www.ebi.ac.uk/ebisearch/ws/rest/sra-run/entry/{id}/xref/sra-sample", - headers=dict(Accept="application/json") - ) - res.raise_for_status() - bsams.append(res.json()['entries'][0]['references'][0]['acc']) - sleep(.1) - - # res = requests.get(xref_query.format(runs=",".join(runs)), headers=dict(Accept="application/json")) - # res.raise_for_status() - # bsams = [(e['id'], e['references'][0]['acc']) for e in res.json()['entries']] - - for run_id, sam_id in zip(runs, bsams): - print(run_id) - record = {} - record['sample'] = run_id - record['biosample_accession'] = sam_id - res = requests.get( - f"https://www.ebi.ac.uk/ena/browser/api/xml/{sam_id}" - ) - res.raise_for_status() - root = xml.fromstring(res.text) - - record['submitter_id'] = get_tag(root, './/SUBMITTER_ID') - record['scientific_name'] = get_tag(root, './/SCIENTIFIC_NAME') - - for attr in root.findall('.//SAMPLE_ATTRIBUTE'): - key, value = iter(attr) - record[key.text] = value.text - rcds.append(record) - sleep(.1) - - headers = {} - for record in rcds: - for key in record.keys(): - headers[key] = None # use a dict to preserve header order - - - with open('./metadata.tsv', 'w') as f: - wtr = csv.DictWriter(f, dialect='excel', delimiter='\t', fieldnames=headers.keys()) - wtr.writeheader() - wtr.writerows(rcds) - - - +"Grab SRR numbers from Bioprojects and sub-bioprojects via Eutils" + +import requests +import sys +import csv + + +from itertools import batched +from functools import cmp_to_key +from time import sleep +from xml.etree import ElementTree as xml + +esearch = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" +esummary = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi" +elink = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi" + + +import logging +logging.basicConfig(level=logging.INFO) + +logger = logging.getLogger("bio2srr") + +def log(msg): + logger.info(msg) # fix logging later + +def get_tag(root, tag): + val = root.find(tag) + if val is not None: + return val.text + log(f"No result for {tag}") + + + +def header_sort_override(a, b): + if a == b: + return 0 + try: + for name in ["bioproject", "srr_accession", "biosample_accession", "organism", "taxid", "package",]: + if a == name: + return -1 + if b == name: + return 1 + except: + pass + if a < b: + return -1 + else: + return 1 + +hso = cmp_to_key(header_sort_override) + +def resolve_bioproject_ids_and_links(bioproject_id_list): + "Recursively follow bioproject and biosample links, yield biosample UID's and biosample XML" + for i, (bioproject, bioproject_id) in enumerate(bioproject_id_list): + log(f"Processing {bioproject} ({bioproject_id}) {i+1}/{len(bioproject_id_list)}") + #get bioproject to bioproject links + response = requests.get(elink, params=dict(db="bioproject", dbfrom="bioproject", id=bioproject_id, format="json")) + response.raise_for_status() + reply = response.json() + linksets = reply.get("linksets", [{}])[0].get("linksetdbs", [0,0,{}]) + if len(linksets) >= 3: + for id in linksets[2].get("links", []): #third index is the up to down links + response = requests.get(esummary, params=dict(id=id, db="bioproject", format="json")) + response.raise_for_status() + replyy = response.json() + biop = replyy["result"][id]["project_acc"] + if id not in bioproject_id_list: + bioproject_id_list.append((biop, id)) # recurse over bioproject links + # get bioproject to biosample links + response = requests.get(elink, params=dict(db="biosample", dbfrom="bioproject", id=bioproject_id, format="json")) + response.raise_for_status() + reply = response.json() + links = reply.get("linksets", [{}])[0].get("linksetdbs", [{}])[0].get("links", []) + log(f"Found {len(links)} biosample links for {bioproject} ({bioproject_id})") + for ids in batched(links, 200): + response = requests.get(esummary, params=dict(id=",".join(ids), db="biosample", format="json")) + response.raise_for_status() + replyy = response.json() + for field, value in replyy.get("result", {}).items(): + if "uids" not in field: + yield bioproject, field, value["sampledata"] # this is XML, deleriously + sleep(0.1) + + +biosample_example = """ +<BioSample access="public" publication_date="2020-12-21T00:00:00.000" last_update="2022-06-23T17:45:35.674" submission_date="2020-12-21T15:08:05.690" id="17131268" accession="SAMN17131268"> + <Ids> + <Id db="BioSample" is_primary="1">SAMN17131268</Id> + <Id db_label="Sample name">CJP19-D996</Id> + </Ids> + <Description> + <Title>Pathogen: environmental/food/other sample from Campylobacter jejuni</Title> + <Organism taxonomy_id="197" taxonomy_name="Campylobacter jejuni"> + <OrganismName>Campylobacter jejuni</OrganismName> + </Organism> + </Description> + <Owner> + <Name url="http://www.fda.gov/Food/FoodScienceResearch/WholeGenomeSequencingProgramWGS/default.htm" abbreviation="CFSAN">FDA Center for Food Safety and Applied Nutrition</Name> + </Owner> + <Models> + <Model>Pathogen.env</Model> + </Models> + <Package display_name="Pathogen: environmental/food/other; version 1.0">Pathogen.env.1.0</Package> + <Attributes> + <Attribute attribute_name="strain" harmonized_name="strain" display_name="strain">CJP19-D996</Attribute> + <Attribute attribute_name="collection_date" harmonized_name="collection_date" display_name="collection date">missing</Attribute> + <Attribute attribute_name="geo_loc_name" harmonized_name="geo_loc_name" display_name="geographic location">missing</Attribute> + <Attribute attribute_name="collected_by" harmonized_name="collected_by" display_name="collected by">CDC</Attribute> + <Attribute attribute_name="lat_lon" harmonized_name="lat_lon" display_name="latitude and longitude">missing</Attribute> + <Attribute attribute_name="isolation_source" harmonized_name="isolation_source" display_name="isolation source">missing</Attribute> + <Attribute attribute_name="isolate" harmonized_name="isolate" display_name="isolate">CFSAN091032</Attribute> + <Attribute attribute_name="project name" harmonized_name="project_name" display_name="project name">GenomeTrakr</Attribute> + <Attribute attribute_name="sequenced by" harmonized_name="sequenced_by" display_name="sequenced by">FDA Center for Food Safety and Applied Nutrition</Attribute> + </Attributes> + <Links> + <Link type="entrez" target="bioproject" label="PRJNA681235">681235</Link> + </Links> + <Status status="live" when="2020-12-21T15:08:05.693"/> +</BioSample> + +""" + +def flatten_biosample_xml(biosampxml): + root = xml.fromstring(biosampxml) + accession = get_tag(root, r'.//Id[@db="BioSample"]') + # sample_name = get_tag(root, r'.//Id[@db_label="Sample name"]') + organism = get_tag(root, r".//OrganismName") + tax_id = root.find(r".//Organism").attrib.get("taxonomy_id") + package = get_tag(root, r".//Package") + sampledict = dict( + biosample_accession=accession, + # sample_name=sample_name, + organism = organism, + taxid = tax_id, + package = package + ) + for attribute in root.findall("Attributes/Attribute"): + sampledict[attribute.attrib.get("harmonized_name", attribute.attrib['attribute_name'])] = attribute.text + + return sampledict + + +def yield_sra_runs_from_sample(biosampleids): + sleep(0.1) + response = requests.get(elink, params=dict(id=",".join(biosampleids), dbfrom="biosample", db="sra", format="json")) + response.raise_for_status() + reply = response.json() + for ids in batched(reply.get("linksets", [{}])[0].get("linksetdbs", [{}])[0].get("links", []), 200): + sleep(0.1) + response = requests.get(esummary, params=dict(id=','.join(ids), db="sra", format="json")) + response.raise_for_status() + replyy = response.json() + for field, value in replyy.get("result", {}).items(): + if "uids" not in field: + yield field, value.get("runs") + + +runs_example = """ +<Run acc="SRR13167188" total_spots="827691" total_bases="385043067" load_done="true" is_public="true" cluster_name="public" static_data_available="true"/> +<Run acc="SRR13167189" total_spots="827691" total_bases="385043067" load_done="true" is_public="true" cluster_name="public" static_data_available="true"/> +""" + +def flatten_runs(runxml): + root = xml.fromstring(f"<data>{runxml}</data>") # gotta fix their garbage embedded XML since it isn't singly-rooted + for run in root.findall(".//Run"): + yield dict( + sra_run_accession = run.attrib["acc"], + total_spots = run.attrib["total_spots"], + total_bases = run.attrib["total_bases"], + ) + + + +def main(starting_bioproject): + rows = [] + response = requests.get(esearch, params=dict(db="bioproject", term=starting_bioproject, field="PRJA", format="json")) + response.raise_for_status() + reply = response.json() + try: + bioproject_id = reply["esearchresult"]["idlist"][0] + log(f"Found UID {bioproject_id} for '{starting_bioproject}'") + except IndexError: + logger.error(f"No results found for '{starting_bioproject}'. Error was \"{reply['esearchresult']['warninglist']['outputmessages']}\"") + sys.exit(1) + for bioproject, biosample, biosample_xml in resolve_bioproject_ids_and_links([(starting_bioproject, bioproject_id)]): + try: + sampledict = flatten_biosample_xml(biosample_xml) + except KeyError: + log(biosample_xml) + raise + sampledict["bioproject"] = bioproject + for sra, runs in yield_sra_runs_from_sample(biosample): + for run in flatten_runs(runs.strip()): + run.update(sampledict) + rows.append(run) + + log(f"Writing {len(rows)} rows to metadata.tsv") + + header = set() + for row in rows: + for key in row.keys(): + header.add(key) + + header = sorted(list(header), key=hso) + logger.info(f"Header: {header}") + + rows.sort(key=lambda x: x["biosample_accession"]) + + with open("metadata.tsv", "w") as f: + writer = csv.DictWriter(f, fieldnames=header, delimiter="\t", dialect="excel") + writer.writeheader() + writer.writerows(rows) + + log(f"Writing {len(rows)} accessions to accessions.txt") + + with open("accessions.txt", "w") as f: + for row in rows: + f.write(row["sra_run_accession"] + "\n") + + +if __name__ == "__main__": + b = sys.argv[1].strip() + log(f"Starting with {b}") + try: + main(b) + except requests.HTTPError as e: + logger.error(e) + sys.exit(1) + + + + +
--- a/bio2srr.xml Wed Oct 27 05:00:45 2021 -0400 +++ b/bio2srr.xml Fri May 03 01:17:43 2024 -0400 @@ -1,35 +1,31 @@ -<tool id="bio2srr" name="Bioproject to SRR" version="1.0.0"> - <description>Retrieve SRR accessions and sample metadata from BioProject or BioSample.</description> +<tool id="bio2srr" name="Bioproject to SRR" version="3.0.0"> + <description>Retrieve SRR accessions and sample metadata from BioProject. Recursively follows links to subprojects.</description> <requirements> - <requirement type="package" version="3.8">python</requirement> - <requirement type="package" version="2.24.0">requests</requirement> + <requirement type="package" version="3.12">python</requirement> + <requirement type="package" version="2.31.0">requests</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - #if $src_metadata - $__tool_directory__/bio2srr.py "$input1" $src_metadata > "$output" - #else - $__tool_directory__/bio2srr.py "$input1" > "$output" - #end if + python $__tool_directory__/bio2srr.py "$input1" ]]></command> <inputs> - <param type="text" name="input1" label="BioProject or BioSample" /> - <param type="data" format="tabular" name="src_metadata" label="Table of your own metadata" optional="true" help="Optional metadata table to be folded in." /> + <param type="text" name="input1" label="BioProject" /> </inputs> <outputs> - <data format="txt" name="output" label="SRR Accession List" /> + <data format="txt" name="output" from_work_dir="accessions.txt" label="SRR Accession List" /> <data format="tabular" name="metadata" from_work_dir="metadata.tsv" label="Sample Metadata Table" /> </outputs> <tests> <test> - <param name="input1" value="NOTHING" /> - <output name="output" file="test.txt" /> + <param name="input1" value="PRJNA681235" /> + <output name="output" file="accessions.txt" /> <output name="metadata" file="metadata.tsv" /> </test> + <test expect_failure="1"> + <param name="input1" value="NOTHING" /> + </test> </tests> <help><![CDATA[ - Retrieve SRR accessions and metadata from a BioProject. - - If you have metadata for your own samples (that aren't from the BioProject) you can provide a TSV table with it and they'll be folded together. (It's a union, not a join.) Put sample ID's under the header 'sample'. + Retrieve SRR accessions and sample metadata from a BioProject, including subprojects. It's a JOIN, so if a sample is associated with multiple SRA runs, then each run has a copy of the sample's metadata. ]]></help> <citations> </citations>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/accessions.txt Fri May 03 01:17:43 2024 -0400 @@ -0,0 +1,91 @@ +SRR288080 +SRR005375 +SRR005372 +SRR000090 +SRR000091 +SRR000092 +SRR000093 +SRR000094 +SRR000095 +SRR000078 +SRR000079 +SRR000072 +SRR000073 +SRR000074 +SRR000075 +SRR000076 +SRR000077 +SRR000070 +SRR000071 +SRR288080 +SRR000090 +SRR000091 +SRR000092 +SRR000093 +SRR000094 +SRR000095 +SRR000078 +SRR000079 +SRR000070 +SRR000071 +SRR288080 +SRR000090 +SRR000091 +SRR000092 +SRR000093 +SRR000094 +SRR000095 +SRR000080 +SRR000081 +SRR000078 +SRR000079 +SRR000070 +SRR000071 +SRR288080 +SRR005375 +SRR005372 +SRR000090 +SRR000091 +SRR000092 +SRR000093 +SRR000094 +SRR000095 +SRR000078 +SRR000079 +SRR000072 +SRR000073 +SRR000074 +SRR000075 +SRR000076 +SRR000077 +SRR000070 +SRR000071 +SRR000068 +SRR000069 +SRR288080 +SRR000080 +SRR000081 +SRR000078 +SRR000079 +SRR000068 +SRR000069 +SRR000066 +SRR000067 +SRR288080 +SRR287817 +SRR000089 +SRR000088 +SRR000087 +SRR000086 +SRR000085 +SRR000084 +SRR000083 +SRR000082 +SRR000080 +SRR000081 +SRR000078 +SRR000079 +SRR000068 +SRR000069 +SRR000066 +SRR000067
--- a/test-data/metadata.tsv Wed Oct 27 05:00:45 2021 -0400 +++ b/test-data/metadata.tsv Fri May 03 01:17:43 2024 -0400 @@ -1,3 +1,92 @@ -sample biosample_accession isolation_source collection_date geo_loc_name lat_lon rel_to_oxygen samp_collect_device samp_mat_process samp_size source_material_id BioSampleModel ENA-SPOT-COUNT ENA-BASE-COUNT ENA-FIRST-PUBLIC ENA-LAST-UPDATE -SRR11671300 SAMN14820590 marine sediment Not applicable North Sea: German Bight (Helgoland Mud Area) 54.052300 N 7.580400 E obligate anaerobe anoxic sampling of sediment slurry incubations at defined time points Previously collected sediments from Helgoland mud area, stored at 4ºC (near in-situ temperature) until use as starting material for incubation experiments. For the incubation experiments, anaerobic slurries were prepared in ratio 1:4 and incubated at 30ºC. Sample name identifies the unique information of each sample including the target 16S rRNA gene, incubation timepoint and state of the enrichment. 1 ml slurry in triplicates, Pooled DNA from triplicates were sequenced as 1 sample. sediment incubation at 30ºC without any amendment sampled after 105 days sequenced with bacteria PCR primers Metagenome or environmental 41855 6160440 2020-05-04 2020-05-04 -SRR11671283 SAMN14820597 marine sediment Not applicable North Sea: German Bight (Helgoland Mud Area) 54.052300 N 7.580400 E obligate anaerobe anoxic sampling of sediment slurry incubations at defined time points Previously collected sediments from Helgoland mud area, stored at 4ºC (near in-situ temperature) until use as starting material for incubation experiments. For the incubation experiments, anaerobic slurries were prepared in ratio 1:4 and incubated at 30ºC. Sample name identifies the unique information of each sample including the target 16S rRNA gene, incubation timepoint and state of the enrichment. 1 ml slurry in triplicates, Pooled DNA from triplicates were sequenced as 1 sample. sediment incubation at 30ºC without any amendment sampled after 105 days sequenced with archaea PCR primers Metagenome or environmental 19833 2973256 2020-05-04 2020-05-04 +bioproject biosample_accession organism taxid package Genus ProjectAccession PublicAccession Species attribute_package collected_by collection_date geo_loc_name isolate isolate_name_alias isolation_source lat_lon project_name sequenced_by sra_run_accession strain total_bases total_spots +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 ECP19-2498 1008246 3835 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR005375 ECP19-2498 63477063 237172 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR005372 ECP19-2498 21805775 88278 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000090 ECP19-2498 59522375 222843 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000091 ECP19-2498 392964 1467 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000092 ECP19-2498 872292 3261 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000093 ECP19-2498 60878431 227850 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000094 ECP19-2498 1311175 4908 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000095 ECP19-2498 592711 2214 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 ECP19-2498 35726106 136244 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 ECP19-2498 33865731 128606 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000072 ECP19-2498 43110538 164772 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000073 ECP19-2498 834018 3206 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000074 ECP19-2498 1191933 4540 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000075 ECP19-2498 817514 3107 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000076 ECP19-2498 53028372 201721 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000077 ECP19-2498 322254 1226 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000070 ECP19-2498 69214301 262057 +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091029 coli environmental/food/other CDC 2019 USA CFSAN091029 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000071 ECP19-2498 56794062 215192 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 ECP19-598 1008246 3835 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000090 ECP19-598 59522375 222843 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000091 ECP19-598 392964 1467 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000092 ECP19-598 872292 3261 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000093 ECP19-598 60878431 227850 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000094 ECP19-598 1311175 4908 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000095 ECP19-598 592711 2214 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 ECP19-598 35726106 136244 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 ECP19-598 33865731 128606 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000070 ECP19-598 69214301 262057 +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091027 coli environmental/food/other CDC 2019 USA CFSAN091027 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000071 ECP19-598 56794062 215192 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 ECP19-798 1008246 3835 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000090 ECP19-798 59522375 222843 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000091 ECP19-798 392964 1467 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000092 ECP19-798 872292 3261 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000093 ECP19-798 60878431 227850 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000094 ECP19-798 1311175 4908 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000095 ECP19-798 592711 2214 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000080 ECP19-798 42230342 158320 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000081 ECP19-798 48201615 180220 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 ECP19-798 35726106 136244 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 ECP19-798 33865731 128606 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000070 ECP19-798 69214301 262057 +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091028 coli environmental/food/other CDC 2019 USA CFSAN091028 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000071 ECP19-798 56794062 215192 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 ECP19-198 1008246 3835 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR005375 ECP19-198 63477063 237172 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR005372 ECP19-198 21805775 88278 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000090 ECP19-198 59522375 222843 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000091 ECP19-198 392964 1467 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000092 ECP19-198 872292 3261 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000093 ECP19-198 60878431 227850 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000094 ECP19-198 1311175 4908 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000095 ECP19-198 592711 2214 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 ECP19-198 35726106 136244 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 ECP19-198 33865731 128606 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000072 ECP19-198 43110538 164772 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000073 ECP19-198 834018 3206 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000074 ECP19-198 1191933 4540 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000075 ECP19-198 817514 3107 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000076 ECP19-198 53028372 201721 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000077 ECP19-198 322254 1226 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000070 ECP19-198 69214301 262057 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000071 ECP19-198 56794062 215192 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000068 ECP19-198 63395546 247135 +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 Escherichia PRJNA681235 CFSAN091030 coli environmental/food/other CDC 2019 USA CFSAN091030 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000069 ECP19-198 57476129 224837 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 CJP19-D445 1008246 3835 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000080 CJP19-D445 42230342 158320 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000081 CJP19-D445 48201615 180220 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 CJP19-D445 35726106 136244 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 CJP19-D445 33865731 128606 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000068 CJP19-D445 63395546 247135 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000069 CJP19-D445 57476129 224837 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000066 CJP19-D445 63790620 242673 +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091031 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000067 CJP19-D445 66936400 255351 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR288080 CJP19-D996 1008246 3835 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR287817 CJP19-D996 155025677 1414888 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000089 CJP19-D996 64994909 250945 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000088 CJP19-D996 62912540 242861 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000087 CJP19-D996 1051130 4049 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000086 CJP19-D996 525756 2047 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000085 CJP19-D996 436118 1684 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000084 CJP19-D996 466139 1803 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000083 CJP19-D996 1251016 4841 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000082 CJP19-D996 1227889 4753 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000080 CJP19-D996 42230342 158320 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000081 CJP19-D996 48201615 180220 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000078 CJP19-D996 35726106 136244 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000079 CJP19-D996 33865731 128606 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000068 CJP19-D996 63395546 247135 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000069 CJP19-D996 57476129 224837 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000066 CJP19-D996 63790620 242673 +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 CDC missing missing CFSAN091032 missing missing GenomeTrakr FDA Center for Food Safety and Applied Nutrition SRR000067 CJP19-D996 66936400 255351
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metadata.tsv.bak Fri May 03 01:17:43 2024 -0400 @@ -0,0 +1,92 @@ +bioproject biosample_accession organism taxid package total_bases lat_lon geo_loc_name collection_date isolate_name_alias isolate sra_run_accession Species sequenced_by strain isolation_source attribute_package project_name total_spots ProjectAccession collected_by PublicAccession Genus +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1008246 missing USA 2019 CFSAN091029 SRR288080 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 3835 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 63477063 missing USA 2019 CFSAN091029 SRR005375 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 237172 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 21805775 missing USA 2019 CFSAN091029 SRR005372 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 88278 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 59522375 missing USA 2019 CFSAN091029 SRR000090 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 222843 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 392964 missing USA 2019 CFSAN091029 SRR000091 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 1467 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 872292 missing USA 2019 CFSAN091029 SRR000092 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 3261 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 60878431 missing USA 2019 CFSAN091029 SRR000093 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 227850 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1311175 missing USA 2019 CFSAN091029 SRR000094 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 4908 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 592711 missing USA 2019 CFSAN091029 SRR000095 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 2214 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 35726106 missing USA 2019 CFSAN091029 SRR000078 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 136244 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 33865731 missing USA 2019 CFSAN091029 SRR000079 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 128606 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 43110538 missing USA 2019 CFSAN091029 SRR000072 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 164772 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 834018 missing USA 2019 CFSAN091029 SRR000073 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 3206 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1191933 missing USA 2019 CFSAN091029 SRR000074 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 4540 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 817514 missing USA 2019 CFSAN091029 SRR000075 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 3107 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 53028372 missing USA 2019 CFSAN091029 SRR000076 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 201721 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 322254 missing USA 2019 CFSAN091029 SRR000077 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 1226 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 69214301 missing USA 2019 CFSAN091029 SRR000070 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 262057 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946945 Escherichia coli O157:H7 83334 Pathogen.env.1.0 56794062 missing USA 2019 CFSAN091029 SRR000071 coli FDA Center for Food Safety and Applied Nutrition ECP19-2498 missing environmental/food/other GenomeTrakr 215192 PRJNA681235 CDC CFSAN091029 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1008246 missing USA 2019 CFSAN091027 SRR288080 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 3835 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 59522375 missing USA 2019 CFSAN091027 SRR000090 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 222843 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 392964 missing USA 2019 CFSAN091027 SRR000091 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 1467 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 872292 missing USA 2019 CFSAN091027 SRR000092 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 3261 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 60878431 missing USA 2019 CFSAN091027 SRR000093 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 227850 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1311175 missing USA 2019 CFSAN091027 SRR000094 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 4908 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 592711 missing USA 2019 CFSAN091027 SRR000095 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 2214 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 35726106 missing USA 2019 CFSAN091027 SRR000078 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 136244 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 33865731 missing USA 2019 CFSAN091027 SRR000079 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 128606 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 69214301 missing USA 2019 CFSAN091027 SRR000070 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 262057 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946946 Escherichia coli O157:H7 83334 Pathogen.env.1.0 56794062 missing USA 2019 CFSAN091027 SRR000071 coli FDA Center for Food Safety and Applied Nutrition ECP19-598 missing environmental/food/other GenomeTrakr 215192 PRJNA681235 CDC CFSAN091027 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1008246 missing USA 2019 CFSAN091028 SRR288080 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 3835 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 59522375 missing USA 2019 CFSAN091028 SRR000090 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 222843 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 392964 missing USA 2019 CFSAN091028 SRR000091 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 1467 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 872292 missing USA 2019 CFSAN091028 SRR000092 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 3261 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 60878431 missing USA 2019 CFSAN091028 SRR000093 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 227850 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1311175 missing USA 2019 CFSAN091028 SRR000094 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 4908 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 592711 missing USA 2019 CFSAN091028 SRR000095 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 2214 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 42230342 missing USA 2019 CFSAN091028 SRR000080 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 158320 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 48201615 missing USA 2019 CFSAN091028 SRR000081 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 180220 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 35726106 missing USA 2019 CFSAN091028 SRR000078 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 136244 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 33865731 missing USA 2019 CFSAN091028 SRR000079 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 128606 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 69214301 missing USA 2019 CFSAN091028 SRR000070 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 262057 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16946947 Escherichia coli O157:H7 83334 Pathogen.env.1.0 56794062 missing USA 2019 CFSAN091028 SRR000071 coli FDA Center for Food Safety and Applied Nutrition ECP19-798 missing environmental/food/other GenomeTrakr 215192 PRJNA681235 CDC CFSAN091028 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1008246 missing USA 2019 CFSAN091030 SRR288080 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 3835 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 63477063 missing USA 2019 CFSAN091030 SRR005375 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 237172 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 21805775 missing USA 2019 CFSAN091030 SRR005372 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 88278 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 59522375 missing USA 2019 CFSAN091030 SRR000090 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 222843 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 392964 missing USA 2019 CFSAN091030 SRR000091 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 1467 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 872292 missing USA 2019 CFSAN091030 SRR000092 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 3261 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 60878431 missing USA 2019 CFSAN091030 SRR000093 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 227850 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1311175 missing USA 2019 CFSAN091030 SRR000094 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 4908 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 592711 missing USA 2019 CFSAN091030 SRR000095 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 2214 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 35726106 missing USA 2019 CFSAN091030 SRR000078 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 136244 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 33865731 missing USA 2019 CFSAN091030 SRR000079 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 128606 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 43110538 missing USA 2019 CFSAN091030 SRR000072 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 164772 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 834018 missing USA 2019 CFSAN091030 SRR000073 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 3206 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 1191933 missing USA 2019 CFSAN091030 SRR000074 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 4540 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 817514 missing USA 2019 CFSAN091030 SRR000075 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 3107 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 53028372 missing USA 2019 CFSAN091030 SRR000076 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 201721 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 322254 missing USA 2019 CFSAN091030 SRR000077 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 1226 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 69214301 missing USA 2019 CFSAN091030 SRR000070 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 262057 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 56794062 missing USA 2019 CFSAN091030 SRR000071 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 215192 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 63395546 missing USA 2019 CFSAN091030 SRR000068 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 247135 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN16956340 Escherichia coli O157:H7 83334 Pathogen.env.1.0 57476129 missing USA 2019 CFSAN091030 SRR000069 coli FDA Center for Food Safety and Applied Nutrition ECP19-198 missing environmental/food/other GenomeTrakr 224837 PRJNA681235 CDC CFSAN091030 Escherichia +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 1008246 missing missing missing CFSAN091031 SRR288080 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 3835 CDC +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 42230342 missing missing missing CFSAN091031 SRR000080 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 158320 CDC +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 48201615 missing missing missing CFSAN091031 SRR000081 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 180220 CDC +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 35726106 missing missing missing CFSAN091031 SRR000078 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 136244 CDC +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 33865731 missing missing missing CFSAN091031 SRR000079 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 128606 CDC +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 63395546 missing missing missing CFSAN091031 SRR000068 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 247135 CDC +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 57476129 missing missing missing CFSAN091031 SRR000069 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 224837 CDC +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 63790620 missing missing missing CFSAN091031 SRR000066 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 242673 CDC +PRJNA681235 SAMN17131267 Campylobacter jejuni 197 Pathogen.env.1.0 66936400 missing missing missing CFSAN091031 SRR000067 FDA Center for Food Safety and Applied Nutrition CJP19-D445 missing GenomeTrakr 255351 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 1008246 missing missing missing CFSAN091032 SRR288080 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 3835 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 155025677 missing missing missing CFSAN091032 SRR287817 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 1414888 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 64994909 missing missing missing CFSAN091032 SRR000089 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 250945 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 62912540 missing missing missing CFSAN091032 SRR000088 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 242861 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 1051130 missing missing missing CFSAN091032 SRR000087 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 4049 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 525756 missing missing missing CFSAN091032 SRR000086 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 2047 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 436118 missing missing missing CFSAN091032 SRR000085 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 1684 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 466139 missing missing missing CFSAN091032 SRR000084 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 1803 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 1251016 missing missing missing CFSAN091032 SRR000083 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 4841 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 1227889 missing missing missing CFSAN091032 SRR000082 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 4753 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 42230342 missing missing missing CFSAN091032 SRR000080 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 158320 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 48201615 missing missing missing CFSAN091032 SRR000081 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 180220 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 35726106 missing missing missing CFSAN091032 SRR000078 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 136244 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 33865731 missing missing missing CFSAN091032 SRR000079 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 128606 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 63395546 missing missing missing CFSAN091032 SRR000068 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 247135 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 57476129 missing missing missing CFSAN091032 SRR000069 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 224837 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 63790620 missing missing missing CFSAN091032 SRR000066 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 242673 CDC +PRJNA681235 SAMN17131268 Campylobacter jejuni 197 Pathogen.env.1.0 66936400 missing missing missing CFSAN091032 SRR000067 FDA Center for Food Safety and Applied Nutrition CJP19-D996 missing GenomeTrakr 255351 CDC
--- a/test-data/test.txt Wed Oct 27 05:00:45 2021 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -SRR11671300 -SRR11671283
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests.py Fri May 03 01:17:43 2024 -0400 @@ -0,0 +1,42 @@ +import pytest + +from bio2srr import * + + +def test_element_tree_xpath(): + from xml.etree import ElementTree as xml + root = xml.fromstring(biosample_example) + assert root.find(".//Id[@db='BioSample']") is not None + +def test_flatten_biosample_xml(): + d = flatten_biosample_xml(biosample_example) + assert d['biosample_accession'] == 'SAMN17131268' + assert d['organism'] == 'Campylobacter jejuni' + assert d['isolate'] == 'CFSAN091032' + +def test_flatten_runs(): + d = list(flatten_runs(runs_example)) + assert len(d) == 2 + +def test_header_sort_override_consistency(): + import random + L = ["C", "B", "A", "taxid", "bioproject"] + L.sort(key=hso) + # assert L[0] == "bioproject" + A = L.copy() + assert A == L + R = [] + for _ in range(100): + random.shuffle(A) + A.sort(key=hso) + R.append(A == L) + assert all(R) + +def test_hso_override(): + assert header_sort_override("bioproject", "taxid") < 0 + assert header_sort_override("taxid", "bioproject") > 0 + assert header_sort_override("taxid", "taxid") == 0 + +def test_hso_regular(): + assert header_sort_override("A", "B") < 0 + assert header_sort_override("B", "A") > 0 \ No newline at end of file