Mercurial > repos > galaxytrakr > plasmidtrakr
view plasmidtrakr.xml @ 17:36d49a2e6bbe draft default tip
planemo upload commit cf01f51a17a43906355f9545ece73a4e671e258b
| author | galaxytrakr |
|---|---|
| date | Thu, 30 Apr 2026 19:52:07 +0000 |
| parents | 706b2bbc64ed |
| children |
line wrap: on
line source
<tool id="plasmidtrakr" name="PlasmidTrakr" version="0.2.3"> <description>Screens assemblies against a Mash database and predicts isolate source using a trained machine learning model</description> <requirements> <requirement type="package" version="2.3">mash</requirement> <requirement type="package" version="2.3.3">pandas</requirement> <requirement type="package" version="1.6.1">scikit-learn</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ ## 1. Setup names and database #set $input_name = $assembly_input.element_identifier.replace(" ", "_").replace(".", "_") ln -s '$mash_database.fields.path' queries.msh && ## 2. Run Mash Screen mash screen -w -i $threshold queries.msh '$assembly_input' > '${input_name}_mash.tabular' && ## 3. Conditional Logic: Check if Mash produced hits if [ \$(wc -l < '${input_name}_mash.tabular') -ge 2 ]; then python $__tool_directory__/predict_source.py -i '${input_name}_mash.tabular' -b '$model_selection.fields.path' -t $threshold -o '$prediction_output'; ## Optional: If the script ran but produced no results (e.g. filtered out) ## ensure we still provide the "No Prediction" fallback if [ ! -s '$prediction_output' ] || [ \$(wc -l < '$prediction_output') -le 1 ]; then echo -e "Run\tPredicted_Source\tConfidence_Score" > '$prediction_output'; echo -e "${input_name}\tNo Prediction\t0.0" >> '$prediction_output'; fi else ## Skip Python and create the fallback file immediately echo -e "Run\tPredicted_Source\tConfidence_Score" > '$prediction_output'; echo -e "${input_name}\tNo Prediction\t0.0" >> '$prediction_output'; fi ]]></command> <inputs> <param name="assembly_input" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Genome Assembly / Reads" help="The FASTA/FASTQ file containing the isolate sequence."/> <param name="mash_database" type="select" label="Select Mash Database" help="Choose the pre-computed Mash sketch database to screen against."> <options from_data_table="mash_sketches"> <validator type="no_options" message="No Mash databases are configured. Please contact your Galaxy administrator." /> </options> </param> <param name="model_selection" type="select" label="Select Prediction Model" help="Choose which trained model to use for prediction."> <options from_data_table="plasmidtrakr_models"> <validator type="no_options" message="No prediction models are configured. Please contact your Galaxy administrator." /> </options> </param> <param name="threshold" type="float" value="0.95" min="0.0" max="1.0" label="Mash Identity Threshold" help="Filter plasmid hits below this identity. Must match the threshold used for model training."/> </inputs> <outputs> <data name="prediction_output" format="tabular" label="Prediction for ${on_string}" /> </outputs> <help><![CDATA[ **What it does** This tool performs a complete workflow in a single step: it screens a genome assembly or read set against a built-in plasmid database using **mash screen**, and then feeds those plasmid hits into a pre-trained **machine learning model** to predict the original source of the isolate. **Workflow** 1. Provide your **genome assembly (FASTA)** or raw reads. 2. Select your **Mash database** from the server's configured list. 3. Select the desired prediction model. 4. Execute to screen and predict in one step. **Output** A tabular file containing the isolate ID, the predicted source, and a confidence score. ]]></help> <citations> <citation type="bibtex"> @misc{strain_2026_plasmidtrakr, author = {Strain, Errol}, title = {PlasmidTrakr: A tool for predicting isolate source from plasmid profiles}, year = {2026}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/estrain/plasmidtrakr}} } </citation> </citations> </tool>
