table_ops: table-union.py comparison

planemo upload

comparison

equal deleted inserted replaced

-:746091a78780
+:1af2524f48b7
 import csv
 import sys
-def main(files):
+def main(unionize=True, *files):
 	header = []
 	items = []
 	possible_identity_headers = None
 	for fi in files:
 		with open(fi, 'rU') as table:
 			reader = csv.DictReader(table, delimiter='\t', dialect='excel-tab')
 			rows = list(reader)
 			for field in reader.fieldnames:
 				if field not in set(header):
 					header.append(field)
 				#try to find identity columns in the files, to use to join
 				if possible_identity_headers is None:
 					possible_identity_headers = set(reader.fieldnames)
 				#winnow down the shared columns in each file by whether they're present in all, and all their values are unique in each file and not null
 				#because these are the most likely to be shared keys
 	# 	#finally
 	# 	possible_identity_headers = set((possible_identity_headers.pop(), ))
 	#if we found an identity column, then try to join rows
-	if possible_identity_headers:
+	if possible_identity_headers and unionize:
 		key_column = possible_identity_headers.pop()
 		keys = set([r[key_column] for r in items])
 		merged_rows = []
 		for key in sorted(keys):
 			new_row = {}
 	wr.writeheader()
 	wr.writerows(items)
 if __name__ == '__main__':
-	main(sys.argv[1:])
+	main(*sys.argv[1:])

Mercurial > repos > jpayne > table_ops