Mercurial > repos > galaxytrakr > plasmidtrakr

<tool id="plasmidtrakr" name="PlasmidTrakr" version="0.2.2">
    <description>Screens assemblies against a Mash database and predicts isolate source using a trained machine learning model</description>

    <requirements>
        <requirement type="package" version="2.3">mash</requirement>
        <requirement type="package" version="2.3.3">pandas</requirement>
        <requirement type="package" version="1.6.1">scikit-learn</requirement>
    </requirements>

    <command detect_errors="exit_code"><![CDATA[
        ## 1. Setup names and database
        #set $input_name = $assembly_input.element_identifier.replace(" ", "_")
        ln -s '$mash_database.fields.path' queries.msh &&

        ## 2. Run Mash Screen
        mash screen
            -w
            -i $threshold
            queries.msh
            '$assembly_input'
            > mash_results.tabular
        &&

        ## 3. Conditional Logic: Check if Mash produced hits
        ## [ -s file ] checks if file exists and has size > 0
        if [ -s mash_results.tabular ]; then
            ## Run the prediction script only if there is data
                python $__tool_directory__/predict_source.py
                    -i mash_results.tabular
                    -b '$model_selection.fields.path'
                    -t $threshold
                    -o '$prediction_output';

            ## Optional: If the script ran but produced no results (e.g. filtered out)
            ## ensure we still provide the "No Prediction" fallback
            if [ \$(wc -l < '$prediction_output') -le 1 ]; then
                    echo -e "Run\tPredicted_Source\tConfidence_Score" > '$prediction_output';
                    echo -e "${input_name}\tNo Prediction\t0.0" >> '$prediction_output';
            fi
        else
            ## Skip Python and create the fallback file immediately
            echo -e "Run\tPredicted_Source\tConfidence_Score" > '$prediction_output';
            echo -e "${input_name}\tNo Prediction\t0.0" >> '$prediction_output';
        fi
    ]]></command>

    <inputs>
        <param name="assembly_input" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Genome Assembly / Reads" help="The FASTA/FASTQ file containing the isolate sequence."/>

        <param name="mash_database" type="select" label="Select Mash Database" help="Choose the pre-computed Mash sketch database to screen against.">
            <options from_data_table="mash_sketches">
                <validator type="no_options" message="No Mash databases are configured. Please contact your Galaxy administrator." />
            </options>
        </param>

        <param name="model_selection" type="select" label="Select Prediction Model" help="Choose which trained model to use for prediction.">
            <options from_data_table="plasmidtrakr_models">
                <validator type="no_options" message="No prediction models are configured. Please contact your Galaxy administrator." />
            </options>
        </param>

        <param name="threshold" type="float" value="0.95" min="0.0" max="1.0" label="Mash Identity Threshold" help="Filter plasmid hits below this identity. Must match the threshold used for model training."/>
    </inputs>

    <outputs>
        <data name="prediction_output" format="tabular" label="Prediction for ${on_string}" />
    </outputs>

    <help><![CDATA[
**What it does**

This tool performs a complete workflow in a single step: it screens a genome assembly or read set against a built-in plasmid database using **mash screen**, and then feeds those plasmid hits into a pre-trained **machine learning model** to predict the original source of the isolate.

**Workflow**
1. Provide your **genome assembly (FASTA)** or raw reads.
2. Select your **Mash database** from the server's configured list.
3. Select the desired prediction model.
4. Execute to screen and predict in one step.

**Output**
A tabular file containing the isolate ID, the predicted source, and a confidence score.
    ]]></help>

    <citations>
        <citation type="bibtex">
            @misc{strain_2026_plasmidtrakr,
                author = {Strain, Errol},
                title = {PlasmidTrakr: A tool for predicting isolate source from plasmid profiles},
                year = {2026},
                publisher = {GitHub},
                journal = {GitHub repository},
                howpublished = {\url{https://github.com/estrain/plasmidtrakr}}
            }
        </citation>
    </citations>
</tool>
author	galaxytrakr
date	Thu, 30 Apr 2026 19:40:55 +0000
parents	58006290e654
children	36d49a2e6bbe