# load the packages
library('phyloseq')
library('tibble')
library('ggplot2')
library('dplyr')
library('tidyr')
library('ape')
library('vegan')
library('stringr')

Warning message:
“replacing previous import ‘vctrs::data_frame’ by ‘tibble::data_frame’ when loading ‘dplyr’”

Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: permute

Loading required package: lattice

This is vegan 2.5-6


# set the working directory
setwd('../plots')


import_table <- read.table('../otus/otu_frequency_table.tsv',header=TRUE,sep='\t',row.names=1, comment.char = "")


head(import_table)


# convert to a matrix for Phyloseq
otumat <- as.matrix(import_table)
head(otumat)


# create a Phyloseq object using the function `otu_table`
OTU = otu_table(otumat, taxa_are_rows = TRUE)
head(OTU)


import_taxa <- read.table('../taxonomy/otu_taxonomy.tsv',header=TRUE,sep='\t')
head(import_taxa)


# First we have to provide names for the new columns
ranks <- c("kingdom","phylum","class","order","family","genus","species")


taxonomy <- import_taxa %>%
  mutate_at('Taxon',str_replace_all, "[a-z]__","") %>%
  separate(Taxon, sep = ';', into=ranks,remove = TRUE) %>%
  column_to_rownames(var = "Feature.ID") %>%
  as.matrix()
head(taxonomy)

Warning message:
“Expected 7 pieces. Missing pieces filled with `NA` in 13 rows [9, 13, 15, 17, 25, 26, 27, 28, 29, 30, 31, 32, 33].”


# Create a taxonomy class object
TAX = tax_table(taxonomy)


metadata <- read.table('../docs/sample_metadata.tsv',header = T,sep='\t',row.names = 1)
metadata


# As we are not using the negative control, we will remove it
metadata <- metadata[1:11,1:8]
tail(metadata)


# Create a Phyloseq sample_data-class
META <- sample_data(metadata)


otu_tree <- read.tree(file='../otus/otu_rooted_tree.nwk')
otu_tree

Phylogenetic tree with 33 tips and 32 internal nodes.

Tip labels:
	OTU.33, OTU.8, OTU.21, OTU.29, OTU.17, OTU.16, ...
Node labels:
	root, , 0.870, 0.647, 0.637, 0.965, ...

Rooted; includes branch lengths.


## Let's have a look at the tree
plot(otu_tree)


physeq <- phyloseq(OTU,TAX,META,otu_tree)
physeq

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 33 taxa and 11 samples ]
sample_data() Sample Data:       [ 11 samples by 8 sample variables ]
tax_table()   Taxonomy Table:    [ 33 taxa by 8 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 33 tips and 32 internal nodes ]


# rarefaction
rarecurve(t(otu_table(physeq)), step=50, cex=1)


# create a bar plot of abundance
plot_bar(physeq)


# some basic stats
print(min(sample_sums(physeq)))
print(max(sample_sums(physeq)))

[1] 8117
[1] 20184


# we will rarefy the data around 90% of the lowest sample
physeq.rarefied <- rarefy_even_depth(physeq, rngseed=1, sample.size=0.9*min(sample_sums(physeq)), replace=F)

`set.seed(1)` was used to initialize repeatable random subsampling.

Please record this for your records so others can reproduce.

Try `set.seed(1); .Random.seed` for the full vector

...


# now plot the rarefied version
plot_bar(physeq.rarefied)


# save the phyloseq object
saveRDS(physeq, 'fish_phyloseq.rds')


# also save the rarefied version
saveRDS(physeq.rarefied, 'fish_phyloseq_rarefied.rds')


# open a pdf file
pdf('species_richness_plot.pdf')
# run the plot, or add the saved one
rarecurve(t(otu_table(physeq)), step=50, cex=1.5, col='blue',lty=2)
# close the pdf
dev.off()


# there are other graphic formats that you can use
jpeg("species_richness_plot.jpg", width = 800, height = 800)
rarecurve(t(otu_table(physeq)), step=50, cex=1.5, col='blue',lty=2)
dev.off()

	AM1	AM2	AM3	AM4	AM5	AM6	AS2	AS3	AS4	AS5	AS6
	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>	<int>
OTU.1	723	3634	0	2907	171	1956	2730	2856	4192	3797	3392
OTU.10	0	0	0	0	0	0	2223	0	0	1024	0
OTU.11	1892	0	0	0	82	113	0	0	0	0	0
OTU.12	0	1587	0	0	0	0	0	0	0	0	0
OTU.13	0	0	0	0	0	0	0	0	0	1472	0
OTU.14	0	0	0	0	0	0	0	0	0	0	1087

	AM1	AM2	AM4	AM5	AM6	AS2	AS3	AS4	AS5	AS6
OTU.1	723	3634	2907	171	1956	2730	2856	4192	3797	3392
OTU.10	0	0	0	0	0	2223	0	0	1024	0
OTU.11	1892	0	0	82	113	0	0	0	0	0
OTU.12	0	1587	0	0	0	0	0	0	0	0
OTU.13	0	0	0	0	0	0	0	0	1472	0
OTU.14	0	0	0	0	0	0	0	0	0	1087

	AM1	AM2	AM4	AM5	AM6	AS2	AS3	AS4	AS5	AS6
OTU.1	723	3634	2907	171	1956	2730	2856	4192	3797	3392
OTU.10	0	0	0	0	0	2223	0	0	1024	0
OTU.11	1892	0	0	82	113	0	0	0	0	0
OTU.12	0	1587	0	0	0	0	0	0	0	0
OTU.13	0	0	0	0	0	0	0	0	1472	0
OTU.14	0	0	0	0	0	0	0	0	0	1087

	Feature.ID	Taxon	Confidence
	<chr>	<chr>	<dbl>
1	OTU.1	d__Eukaryota;p__Chordata;c__Actinopteri;o__Scombriformes;f__Gempylidae;g__Thyrsites;s__Thyrsites_atun	1.0000000
2	OTU.2	d__Eukaryota;p__Chordata;c__Actinopteri;o__Mugiliformes;f__Mugilidae;g__Aldrichetta;s__Aldrichetta_forsteri	0.9999999
3	OTU.3	d__Eukaryota;p__Chordata;c__Actinopteri;o__Perciformes;f__Bovichtidae;g__Bovichtus;s__Bovichtus_variegatus	0.9999979
4	OTU.4	d__Eukaryota;p__Chordata;c__Actinopteri;o__Blenniiformes;f__Tripterygiidae;g__Forsterygion;s__Forsterygion_lapillum	1.0000000
5	OTU.5	d__Eukaryota;p__Chordata;c__Actinopteri;o__Labriformes;f__Labridae;g__Notolabrus;s__Notolabrus_fucicola	0.9996494
6	OTU.6	d__Eukaryota;p__Chordata;c__Actinopteri;o__Blenniiformes;f__Tripterygiidae;g__Forsterygion;s__Forsterygion_lapillum	1.0000000

	kingdom	phylum	class	order	family	genus	species	Confidence
OTU.1	Eukaryota	Chordata	Actinopteri	Scombriformes	Gempylidae	Thyrsites	Thyrsites_atun	1.0000000
OTU.2	Eukaryota	Chordata	Actinopteri	Mugiliformes	Mugilidae	Aldrichetta	Aldrichetta_forsteri	0.9999999
OTU.3	Eukaryota	Chordata	Actinopteri	Perciformes	Bovichtidae	Bovichtus	Bovichtus_variegatus	0.9999979
OTU.4	Eukaryota	Chordata	Actinopteri	Blenniiformes	Tripterygiidae	Forsterygion	Forsterygion_lapillum	1.0000000
OTU.5	Eukaryota	Chordata	Actinopteri	Labriformes	Labridae	Notolabrus	Notolabrus_fucicola	0.9996494
OTU.6	Eukaryota	Chordata	Actinopteri	Blenniiformes	Tripterygiidae	Forsterygion	Forsterygion_lapillum	1.0000000

Importing outputs into R¶

Import the frequency table¶

import the taxonomy table (exported from Qiime2)¶

Import the sample metadata¶

Import the phylogenetic tree¶

Create a Phyloseq object¶

Initial data inspection¶

Rarefy the data¶

Saving your work to files¶

Saving a graph to file¶

A data.frame: 12 × 8
	fwd_barcode	rev_barcode	forward_primer	reverse_primer	location	temperature	salinity	sample
	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>
AM1	GAAGAG	TAGCGTCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	mudflats	12	32	AM1
AM2	GAAGAG	TCTACTCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	mudflats	14	32	AM2
AM3	GAAGAG	ATGACTCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	mudflats	12	32	AM3
AM4	GAAGAG	ATCTATCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	mudflats	10	32	AM4
AM5	GAAGAG	ACAGATCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	mudflats	12	34	AM5
AM6	GAAGAG	ATACTGCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	mudflats	10	34	AM6
AS2	GAAGAG	AGATACTC	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	12	32	AS2
AS3	GAAGAG	ATGCGATG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	12	32	AS3
AS4	GAAGAG	TGCTACTC	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	10	34	AS4
AS5	GAAGAG	ACGTCATG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	14	34	AS5
AS6	GAAGAG	TCATGTCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	10	34	AS6
ASN	GAAGAG	AGACGCTC	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	negative	NA	NA	ASN

A data.frame: 6 × 8
	fwd_barcode	rev_barcode	forward_primer	reverse_primer	location	temperature	salinity	sample
	<chr>	<chr>	<chr>	<chr>	<chr>	<int>	<int>	<chr>
AM6	GAAGAG	ATACTGCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	mudflats	10	34	AM6
AS2	GAAGAG	AGATACTC	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	12	32	AS2
AS3	GAAGAG	ATGCGATG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	12	32	AS3
AS4	GAAGAG	TGCTACTC	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	10	34	AS4
AS5	GAAGAG	ACGTCATG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	14	34	AS5
AS6	GAAGAG	TCATGTCG	GACCCTATGGAGCTTTAGAC	CGCTGTTATCCCTADRGTAACT	shore	10	34	AS6