Mercurial > repos > estrain > lissero
annotate variant4b.py @ 0:bc556481a1fb draft default tip
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
| author | estrain |
|---|---|
| date | Fri, 13 Mar 2026 12:01:17 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
1 import sys |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
2 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
3 def identify_variants_with_genes(input_file_path, output_file_path, simple_name): |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
4 # Define the genes of interest |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
5 genes_of_interest = ['LMO0737', 'ORF2110', 'ORF2819'] |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
6 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
7 # Open the input file and read its lines |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
8 with open(input_file_path, 'r') as file: |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
9 lines = file.readlines() |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
10 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
11 # Check if the file has more than just the header |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
12 if len(lines) <= 1: |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
13 print("Input file does not contain enough data.") |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
14 return |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
15 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
16 # Extract the column headers and find the indices of the genes of interest |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
17 headers = lines[0].strip().split('\t') |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
18 gene_indices = [headers.index(gene) for gene in genes_of_interest] |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
19 serotype_index = headers.index('SEROTYPE') |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
20 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
21 # Modify the header to include the new first column |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
22 modified_header = f"FileName\t{lines[0]}" |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
23 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
24 # Initialize a list to hold the modified lines, starting with the modified header |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
25 modified_lines = [modified_header] |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
26 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
27 # Process each data line in the input file |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
28 for line in lines[1:]: |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
29 data = line.strip().split('\t') |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
30 # Check if the genes of interest are all present (marked as "FULL") |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
31 if all(data[index] == 'FULL' for index in gene_indices): |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
32 # Modify the SEROTYPE column to "4b variant" |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
33 data[serotype_index] = "4b variant" |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
34 # Prepend the simple name to the line |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
35 modified_line = f"{simple_name}\t" + '\t'.join(data) + '\n' |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
36 # Add the modified line to the list |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
37 modified_lines.append(modified_line) |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
38 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
39 # Write the modified lines to the output file |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
40 with open(output_file_path, 'w') as file: |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
41 file.writelines(modified_lines) |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
42 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
43 print(f'Results written to {output_file_path}') |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
44 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
45 if __name__ == "__main__": |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
46 if len(sys.argv) != 4: |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
47 print("Usage: python script.py <input_file_path> <output_file_path> <simple_name>") |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
48 sys.exit(1) |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
49 |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
50 input_file_path = sys.argv[1] |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
51 output_file_path = sys.argv[2] |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
52 simple_name = sys.argv[3] |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
53 identify_variants_with_genes(input_file_path, output_file_path, simple_name) |
|
bc556481a1fb
planemo upload commit e12eb56d1744da8f7af8ca1819e2617c83fb17a8
estrain
parents:
diff
changeset
|
54 |
