annotate snp-cache.py @ 13:6adaecff5f2b

planemo upload commit 7f6183b769772449fbcee903686b8d5ec5b7439f-dirty
author jpayne
date Wed, 31 Jan 2018 09:50:37 -0500
parents 66f988a9666f
children 9022b00a9198
rev   line source
jpayne@13 1 #! /usr/bin/env python
jpayne@0 2
jpayne@0 3 import boto3
jpayne@0 4 from botocore.exceptions import ClientError, DataNotFoundError, NoCredentialsError, BotoCoreError
jpayne@0 5
jpayne@0 6 import argparse
jpayne@0 7 import subprocess
jpayne@0 8 import contextlib
jpayne@0 9 import logging
jpayne@0 10 import io
jpayne@0 11 import shutil
jpayne@0 12 import os, sys
jpayne@0 13 from builtins import open as _open
jpayne@0 14 from copy import copy
jpayne@0 15 from functools import partial
jpayne@0 16 from itertools import tee
jpayne@0 17 from io import BytesIO
jpayne@0 18 from threading import Thread
jpayne@0 19
jpayne@0 20 CACHE_NAMESPACE = 'cfsan-galaxytrakr-cache'
jpayne@0 21
jpayne@0 22 class NoCacheNoCommandException(Exception):
jpayne@0 23 pass
jpayne@0 24
jpayne@0 25 @contextlib.contextmanager
jpayne@0 26 def open(filename=None, mode='r'):
jpayne@0 27 "basically a wrapper to make sys.stdout usable where there's a contextmanager"
jpayne@0 28 writer = sys.stdout.buffer
jpayne@0 29 try:
jpayne@0 30 if filename:
jpayne@0 31 writer = io.FileIO(filename, mode)
jpayne@0 32 if 'r' in mode:
jpayne@0 33 writer = io.BufferedReader(writer)
jpayne@0 34 elif 'w' in mode:
jpayne@0 35 writer = io.BufferedWriter(writer)
jpayne@0 36 yield writer
jpayne@0 37 writer.flush()
jpayne@0 38 finally:
jpayne@0 39 if filename:
jpayne@0 40 writer.close()
jpayne@0 41
jpayne@0 42 # class stream_over(io.IOBase):
jpayne@0 43 # "a file-like object that works as a tee, for API's that accept a file-like"
jpayne@0 44 # def __init__(self, output_streams, input_stream=None):
jpayne@0 45 # self.streams = output_streams
jpayne@0 46 # self.input = input_stream
jpayne@0 47
jpayne@0 48 # def writable(self, *a, **k):
jpayne@0 49 # return all([s.writeable(*a, **k) for s in self.streams])
jpayne@0 50
jpayne@0 51 # def write(self, *a, **k):
jpayne@0 52 # [s.write(*a, **k) for s in self.streams]
jpayne@0 53
jpayne@0 54 # def writelines(self, *a, **k):
jpayne@0 55 # [s.writelines(*a, **k) for s in self.streams]
jpayne@0 56
jpayne@0 57 # def flush(self, *a, **k):
jpayne@0 58 # [s.flush(*a, **k) for s in self.streams]
jpayne@0 59
jpayne@0 60 # def close(self, *a, **k):
jpayne@0 61 # if self.input:
jpayne@0 62 # self.input.close()
jpayne@0 63 # [s.close(*a, **k) for s in self.streams]
jpayne@0 64
jpayne@0 65 # def read(self, *a, **k):
jpayne@0 66 # if self.input:
jpayne@0 67 # bts = self.input.read(*a, **k)
jpayne@0 68 # self.write(bts)
jpayne@0 69 # return bts
jpayne@0 70 # raise ValueError("Not created with a readable stream; read ops not supported.")
jpayne@0 71
jpayne@0 72 # def readlines(self, *a, **k):
jpayne@0 73 # if self.input:
jpayne@0 74 # return self.input.readlines(*a, **k)
jpayne@0 75 # raise ValueError("Not created with a readable stream; read ops not supported.")
jpayne@0 76
jpayne@0 77 # def seekable(self):
jpayne@0 78 # return False
jpayne@0 79
jpayne@0 80 # @contextlib.contextmanager
jpayne@0 81 # def multiwrite(*streams):
jpayne@0 82 # multistream = stream_over(streams)
jpayne@0 83 # yield multistream
jpayne@0 84 # multistream.flush()
jpayne@0 85 # multistream.close()
jpayne@0 86
jpayne@0 87 def stream_to(input_stream, output_stream):
jpayne@0 88 for i, line in enumerate(input_stream.readlines()):
jpayne@0 89 if i < 8:
jpayne@0 90 logging.getLogger('strm').info(str(line[:70]))
jpayne@0 91 output_stream.write(line)
jpayne@0 92
jpayne@0 93
jpayne@0 94
jpayne@0 95 def main(table, id, command=None, output=None, *a, **k):
jpayne@0 96 id = id.strip()
jpayne@0 97 table = table.strip()
jpayne@0 98 name = f"{table}/{id}"
jpayne@0 99 with open(output, 'wb') as output_f:
jpayne@0 100 #lookup ID in table and get a FH to the resource
jpayne@0 101 try:
jpayne@0 102 api_key = os.environ.get('AWS_API_KEY', '')
jpayne@0 103 s3 = boto3.resource('s3').Bucket(CACHE_NAMESPACE)
jpayne@0 104 s3.download_fileobj(name, output_f)
jpayne@0 105 logging.getLogger('snp-cache.cache').info(f"cache hit on {name}, retrieved.")
jpayne@4 106 except (DataNotFoundError, NoCredentialsError, BotoCoreError, ClientError) as e:
jpayne@0 107 if type(e) is DataNotFoundError:
jpayne@0 108 logging.getLogger('snp-cache.cache').info(f"cache miss on {name}")
jpayne@0 109 else:
jpayne@0 110 logging.getLogger('snp-cache.cache').error(e)
jpayne@0 111 #if we couldn't find the data, we need to run the command to generate it
jpayne@0 112 if not command:
jpayne@0 113 raise NoCacheNoCommandException("No cached result for this id, and no command given to generate.")
jpayne@0 114 logging.getLogger('snp-cache.cmd').info(command)
jpayne@0 115 # sub = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
jpayne@0 116 # cached, err = sub.communicate()
jpayne@0 117 # cached, err = io.BytesIO(cached), io.BytesIO(err)
jpayne@0 118 try:
jpayne@0 119 cached = subprocess.check_output(command, shell=True)
jpayne@0 120 try:
jpayne@0 121 s3.upload_fileobj(BytesIO(cached), name)
jpayne@0 122 except (ClientError, BotoCoreError) as e:
jpayne@0 123 logging.getLogger('snp-cache.cache').error('Error writing to cache:')
jpayne@0 124 logging.getLogger('snp-cache.cache').error(e)
jpayne@0 125 finally:
jpayne@0 126 #stream_to(cached, output_f) #stream FROM cached TO output_f
jpayne@0 127 output_f.write(cached)
jpayne@0 128 except subprocess.CalledProcessError as e:
jpayne@0 129 print(e.output, file=sys.stderr)
jpayne@0 130 return e.returncode
jpayne@0 131 return 0
jpayne@0 132
jpayne@0 133
jpayne@0 134
jpayne@0 135
jpayne@0 136 if __name__ == '__main__':
jpayne@0 137 parser = argparse.ArgumentParser(description="lookup result for file in data table, or compute and install")
jpayne@0 138 parser.add_argument('table', type=str)
jpayne@0 139 parser.add_argument('id', type=str)
jpayne@0 140 parser.add_argument('-c', dest='command')
jpayne@0 141 parser.add_argument('-o', dest='output')
jpayne@0 142 parser.add_argument('-l', dest='logging', default='/dev/null')
jpayne@0 143 params = parser.parse_args()
jpayne@0 144
jpayne@0 145 logging.basicConfig(filename=params.logging,level=logging.INFO)
jpayne@0 146
jpayne@0 147 quit(main(**vars(params)))