view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/gff/GbffLocus.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line source
package gff;

import java.util.ArrayList;

import fileIO.ByteStreamWriter;
import shared.Tools;

public class GbffLocus {

	public GbffLocus(ArrayList<byte[]> lines) {
		while(num<lines.size()){
			parseBlock(lines);
		}
	}

	int parseBlock(ArrayList<byte[]> lines){
		byte[] line=lines.get(num);
		if(Tools.startsWith(line, " ")){
			assert(false) : line;
			num++;
		}else if(Tools.startsWith(line, "LOCUS ")){
			parseLocus(lines);
		}else if(Tools.startsWith(line, "DEFINITION ")){
			parseDefinition(lines);
		}else if(Tools.startsWith(line, "ACCESSION ")){
			parseAccession(lines);
		}else if(Tools.startsWith(line, "VERSION ")){
			parseVersion(lines);
		}else if(Tools.startsWith(line, "DBLINK ")){
			parseDBLink(lines);
		}else if(Tools.startsWith(line, "KEYWORDS ")){
			parseKeywords(lines);
		}else if(Tools.startsWith(line, "SOURCE ")){
			parseSource(lines);
		}else if(Tools.startsWith(line, "REFERENCE ")){
			parseReference(lines);
		}else if(Tools.startsWith(line, "COMMENT ")){
			parseComment(lines);
		}else if(Tools.startsWith(line, "FEATURES ")){
			parseFeatures(lines);
		}else if(Tools.startsWith(line, "CONTIG ")){
			parseContig(lines);
		}else if(Tools.startsWith(line, "ORIGIN ")){
			parseOrigin(lines);
		}else if(Tools.startsWith(line, "PRIMARY ")){
			parsePrimary(lines);
		}else{
			assert(false) : "Unhandled block type: "+new String(line);
		}
		return num;
	}
	
	private byte[] nextLine(ArrayList<byte[]> lines){
		byte[] line=null;
		for(final int lim=lines.size()-1; num<lim && (line==null || line.length==0); ){
//			System.err.println(num+", "+lim);
			num++;
			line=lines.get(num);
		}
//		System.err.println(line);
//		assert(line!=null);
		return line;
	}
	
	private byte[] getLine(ArrayList<byte[]> lines){
		return num>=lines.size() ? null : lines.get(num);
	}
	
	/** Move pointer to next block start */
	private int advanceBlock(ArrayList<byte[]> lines){
		for(num++; num<lines.size(); num++){
			byte[] line=lines.get(num);
			if(line!=null && line.length>0 && line[0]!=' '){break;}
		}
		return num;
	}
	
	/** Move pointer to next block start */
	private int advanceFeature(ArrayList<byte[]> lines){
		for(num++; num<lines.size(); num++){
			byte[] line=lines.get(num);
			if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){break;}
		}
		return num;
	}
	
	private String trimBlockName(byte[] line){
		assert(line.length>=12 && line[11]==' ') : new String(line);
		return new String(line, 12, line.length-12);
	}
	
	private String toFeatureType(byte[] line){
		assert(line[4]==' ');
		assert(line[5]!=' ');
		assert(line[20]==' ');
		int start=5, stop=6;
		for(; stop<21 && line[stop]!=' '; stop++){}
		return new String(line, start, stop-start);
	}
	
	private int parseLocus(ArrayList<byte[]> lines){
		byte[] line=lines.get(num);
//		assert(Tools.startsWith(line, "LOCUS")) : new String(line);
		if(accession==null){
			String s=trimBlockName(line);
			String[] split=Tools.whitespacePlus.split(s);
			accession=split.length>0 ? split[0] : null;
		}
		return advanceBlock(lines);
	}
	
	private int parseDefinition(ArrayList<byte[]> lines){
		byte[] line=lines.get(num);
		if(organism==null){
			String s=trimBlockName(line);
			String[] split=Tools.commaPattern.split(s);
			organism=split.length>0 ? split[0] : null;
		}
		return advanceBlock(lines);
	}
	
	private int parseAccession(ArrayList<byte[]> lines){
		byte[] line=lines.get(num);
		if(accession==null){
			String s=trimBlockName(line);
			String[] split=Tools.whitespacePlus.split(s);
			accession=split.length>0 ? split[0] : null;
		}
		return advanceBlock(lines);
	}
	
	private int parseVersion(ArrayList<byte[]> lines){
		byte[] line=lines.get(num);
		String s=trimBlockName(line);
		String[] split=Tools.whitespacePlus.split(s);
		s=split.length>0 ? split[0] : null;
		if(accession==null || (s!=null && s.length()>1)){
			accession=s;
		}
		return advanceBlock(lines);
	}
	
	private int parseDBLink(ArrayList<byte[]> lines){
		return advanceBlock(lines);
	}
	
	private int parseKeywords(ArrayList<byte[]> lines){
		return advanceBlock(lines);
	}
	
	private int parseSource(ArrayList<byte[]> lines){
		byte[] line=lines.get(num);
		if(species==null){
			species=trimBlockName(line);
		}
		return advanceBlock(lines);
	}
	
	private int parseReference(ArrayList<byte[]> lines){
		return advanceBlock(lines);
	}
	
	private int parseComment(ArrayList<byte[]> lines){
		return advanceBlock(lines);
	}
	
	private int parseFeatures(ArrayList<byte[]> lines){
		for(byte[] line=nextLine(lines); line!=null && line[0]==' '; line=getLine(lines)){
//			System.err.println(num+": "+new String(line));
			String type=toFeatureType(line);
			int idx=Tools.find(type, featureTypes);
//			System.err.println("idx="+idx+" for '"+type+"'");
			if(idx>=0){
//				System.err.println("parseFeature");
				parseFeature(lines, type);
//				System.err.println(features.get(features.size()-1));
			}else{
//				System.err.println("advanceFeature");
				advanceFeature(lines);
			}
		}
		return num;
	}
	
	/** Move pointer to next block start */
	private int parseFeature(ArrayList<byte[]> lines, String type){
		ArrayList<byte[]> flist=new ArrayList<byte[]>();
		flist.add(lines.get(num));
		for(num++; num<lines.size(); num++){
			byte[] line=lines.get(num);
			if(line!=null && line.length>0 && (line[0]!=' ' || line[5]!=' ')){
//				assert(false) : Character.toString(line[0])+", "+Character.toString(line[5])+", "+Character.toString(line[6])+"\n"+new String(line);
				break;
			}
			flist.add(line);
		}
		GbffFeature f=new GbffFeature(flist, type, accession);
		if(!f.error){
			features.add(f);
		}else{
//			System.err.println("Failed to parse feature "+f);
		}
		return num;
	}
	
	private int parseContig(ArrayList<byte[]> lines){
		return advanceBlock(lines);
	}
	
	private int parseOrigin(ArrayList<byte[]> lines){
		return advanceBlock(lines);
	}
	
	private int parsePrimary(ArrayList<byte[]> lines){
		return advanceBlock(lines);
	}
	
	public void toGff(ByteStreamWriter bsw) {
		final byte[] accessionB=accession.getBytes();
		bsw.print(seqRegB);
		bsw.print(accessionB);
		if(start>0 && stop>0){
			bsw.print(' ').print(start).print(' ').print(stop);
		}
		bsw.println();
		for(GbffFeature f : features){
			if(f.type==GbffFeature.CDS || f.type==GbffFeature.tRNA || f.type==GbffFeature.rRNA){
				if(!f.pseudo && !f.error){
					f.toGff(bsw);
				}
			}
		}
	}
	
	
	/** Line number */
	int num=0;
	
	boolean printGene=false;
	boolean printRepeat=false; 
	
	public static String[] featureTypes=GbffFeature.typeStrings;
	private static final byte[] seqRegB="##sequence-region ".getBytes();
	
	String accession;
	String organism;
	String species;
	int start;
	int stop;
	ArrayList<GbffFeature> features=new ArrayList<GbffFeature>();
}