diff bio2srr.py @ 0:bbf3b6e6a026 tip

planemo upload commit b'4aa8338dc8bcd7f6c0fb675044ea9d0c045ee7f3\n'
author jpayne
date Tue, 05 Dec 2017 11:42:47 -0500 (2017-12-05)
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bio2srr.py	Tue Dec 05 11:42:47 2017 -0500
@@ -0,0 +1,58 @@
+#! /usr/bin/env python3
+
+"Grab SRR numbers from BioProjects via the EMBL-ENA REST API's."
+
+import requests
+import sys
+
+sra_exp_query = "http://www.ebi.ac.uk/ebisearch/ws/rest/sra-experiment?query={bioproject}"
+
+sample = """{
+    "hitCount": 2,
+    "entries": [
+        {
+            "id": "SRX377510",
+            "source": "sra-experiment"
+        },
+        {
+            "id": "SRX583279",
+            "source": "sra-experiment"
+        }
+    ],
+    "facets": []
+}"""
+
+sra_run_query = "http://www.ebi.ac.uk/ebisearch/ws/rest/sra-run?query={experiment}"
+
+sample = """{
+    "hitCount": 1,
+    "entries": [
+        {
+            "id": "SRR1029665",
+            "source": "sra-run"
+        }
+    ],
+    "facets": []
+}"""
+
+if __name__ == "__main__":
+	try:
+		bioproject = sys.argv[1]
+		b_result = requests.get(sra_exp_query.format(bioproject=bioproject), headers=dict(Accept="application/json"))
+		b_result.raise_for_status()
+		if b_result.json()['entries']:
+			for experiment in [d['id'] for d in b_result.json()['entries']]:
+				r_result = requests.get(sra_run_query.format(experiment=experiment), headers=dict(Accept="application/json"))
+				r_result.raise_for_status()
+				for run in [d['id'] for d in r_result.json()['entries']]:
+					print(run)
+		else:
+			print(f"No results found for '{bioproject}'.", file=sys.stderr)
+			quit(1)
+	except IndexError:
+		raise ValueError("Please provide an NCBI BioProject, NCBI BioSample, EMBL Project, or EMBL Study accession.")
+	except KeyError as e:
+		raise ValueError() from e
+
+
+