16S rRNA Microbiome Analysis - Part 2

Author

Eleana Cabello

Published

June 9, 2025

Overview

Pipeline Outline

6-Classifying ASV’s using a Feature Classifier

gg_version="gg_13_8_otus"
gg_ref_fasta="gg_ref/$gg_version/rep_set/99_otus.fasta"
gg_ref_tax="gg_ref/$gg_version/taxonomy/99_otu_taxonomy.txt"

primer_f="CCTACGGGAGGCAGCAG"
primer_r="GGACTACHVGGGTWTCTAAT"
region="v3_v4"

classifier_name="classifier-${region}-${gg_version}"
classifier_dir="output/5_classifier"

mkdir $classifier_dir

## Classifier Training
qiime tools import \
    --type 'FeatureData[Sequence]' \
    --input-path $gg_ref_fasta \
    --output-path $classifier_dir/${gg_version}-seqs.qza

qiime tools import \
    --type 'FeatureData[Taxonomy]' \
    --input-format HeaderlessTSVTaxonomyFormat \
    --input-path $gg_ref_tax \
    --output-path $classifier_dir/${gg_version}-taxonomy.qza

qiime feature-classifier extract-reads \
    --i-sequences $classifier_dir/${gg_version}-seqs.qza \
    --p-f-primer $primer_f \
    --p-r-primer $primer_r \
    --verbose \
    --o-reads $classifier_dir/ref-seqs-${region}.qza

qiime feature-classifier fit-classifier-naive-bayes \
    --i-reference-reads $classifier_dir/ref-seqs-${region}.qza \
    --i-reference-taxonomy $classifier_dir/${gg_version}_taxonomy.qza \
    --verbose \
    --o-classifier $classifier_dir/$classifier_name.qza
metadata_file="output/0_metadata_files/metadata-copy.tsv"
classifier="output/5_classifier/classifier_v3_v4_gg_13_8.qza"

tax_class_dir="output/6_taxa_classification"
mkdir $tax_class_dir

qiime feature-classifier classify-sklearn \
    --i-classifier $classifier \
    --i-reads $rep_seqs \
    --o-classification $class_sub_analysis_dir/taxonomy.qza
    
qiime metadata tabulate \
    --m-input-file $class_sub_analysis_dir/taxonomy.qza \
    --o-visualization $class_sub_analysis_dir/taxonomy.qzv

qiime taxa barplot \
    --i-table $rep_seqs_table \
    --i-taxonomy $class_sub_analysis_dir/taxonomy.qza \
    --m-metadata-file $metadata_file \
    --o-visualization $class_sub_analysis_dir/taxa-barplot.qzv

Differential Abundance using ANCOM-BC

qiime composition ancombc \
  --i-table gut-table.qza \
  --m-metadata-file sample-metadata.tsv \
  --p-formula subject \
  --o-differentials ancombc-subject.qza
qiime composition da-barplot \
  --i-data ancombc-subject.qza \
  --p-significance-threshold 0.001 \
  --o-visualization da-barplot-subject.qzv

FULL SCRIPT

################################################
STEP 0
################################################
gg_version="gg_13_8_otus"
gg_ref_fasta="gg_ref/$gg_version/rep_set/99_otus.fasta"
gg_ref_tax="gg_ref/$gg_version/taxonomy/99_otu_taxonomy.txt"

primer_f="CCTACGGGAGGCAGCAG"
primer_r="GGACTACHVGGGTWTCTAAT"
region="v3_v4"

classifier_name="classifier-${region}-${gg_version}"
classifier_dir="output/5_classifier"

mkdir $classifier_dir

## Classifier Training
qiime tools import \
    --type 'FeatureData[Sequence]' \
    --input-path $gg_ref_fasta \
    --output-path $classifier_dir/${gg_version}-seqs.qza

qiime tools import \
    --type 'FeatureData[Taxonomy]' \
    --input-format HeaderlessTSVTaxonomyFormat \
    --input-path $gg_ref_tax \
    --output-path $classifier_dir/${gg_version}-taxonomy.qza

qiime feature-classifier extract-reads \
    --i-sequences $classifier_dir/${gg_version}-seqs.qza \
    --p-f-primer $primer_f \
    --p-r-primer $primer_r \
    --verbose \
    --o-reads $classifier_dir/ref-seqs-${region}.qza

qiime feature-classifier fit-classifier-naive-bayes \
    --i-reference-reads $classifier_dir/ref-seqs-${region}.qza \
    --i-reference-taxonomy $classifier_dir/${gg_version}_taxonomy.qza \
    --verbose \
    --o-classifier $classifier_dir/$classifier_name.qza
    
################################################
STEP 0
################################################
metadata_file="output/0_metadata_files/metadata-copy.tsv"
classifier="output/5_classifier/classifier_v3_v4_gg_13_8.qza"

tax_class_dir="output/6_taxa_classification"
mkdir $tax_class_dir

qiime feature-classifier classify-sklearn \
    --i-classifier $classifier \
    --i-reads $rep_seqs \
    --o-classification $class_sub_analysis_dir/taxonomy.qza
    
qiime metadata tabulate \
    --m-input-file $class_sub_analysis_dir/taxonomy.qza \
    --o-visualization $class_sub_analysis_dir/taxonomy.qzv

qiime taxa barplot \
    --i-table $rep_seqs_table \
    --i-taxonomy $class_sub_analysis_dir/taxonomy.qza \
    --m-metadata-file $metadata_file \
    --o-visualization $class_sub_analysis_dir/taxa-barplot.qzv

################################################
STEP 0
################################################
    
qiime composition ancombc \
  --i-table gut-table.qza \
  --m-metadata-file sample-metadata.tsv \
  --p-formula subject \
  --o-differentials ancombc-subject.qza
qiime composition da-barplot \
  --i-data ancombc-subject.qza \
  --p-significance-threshold 0.001 \
  --o-visualization da-barplot-subject.qzv