Full Comp Micro Lab Manual

User Manual:

Open the PDF directly: View PDF PDF.
Page Count: 134 [warning: Documents this large are best viewed by clicking the View PDF Link!]

Computational+Microbiology+
Lab+Manual+(Biology+
2003/3004)+
!"#$%&'%()&*"#+(,-.+,"#/&-$0&12,/3&4"(+"#5&6%-3&72869%2,+95&:"/3;9&6.92,#5&7#)(3<&="#/3$5&:3#&
!3/93(%8>3?$35&@%A932&B"-235&C&4%D<322&E(%F3(&
Getting+Started+
Overview+of+Computational+Microbiology+Workflow+
G.(,#H&+93&A"F;.+%+,"#%2&F,A("-,"2"H$&/3A+,"#&"I&+9,/&A".(/3&$".&<,22&./3&$".(&"<#&A"F;.+3(5&
%#)&+93&/.;3(A"F;.+3(/&%+&+93&4,##3/"+%&B.;3(A"F;.+,#H&J#/+,+.+3&K4BJL&+"&%#%2$M3&
F,A("-,"F3&)%+%N&O32"<&,/&%&),%H(%F&3D;2%,#,#H&<9,A9&(3/".(A3/&%#)&;("H(%F/&<,22&-3&I".#)&"#&
$".(&A"F;.+3(&%#)&"#&4BJN&
&
J#&"()3(&+"&%#%2$M3&+93&F,A("-,"F3&)%+%/3+/&%?%,2%-235&<3&F./+&,#/+%22&/"F3&;("H(%F/&"#&$".(&
A"F;.+3(N&!93/3&;("H(%F/&%(3&2,/+3)&,#&+93&+%-23&-32"<N&>,#P/&I"(&+93/3&;("H(%F/&%(3&%2/"&
%?%,2%-23&"#&+93&A".(/3&4"")23&I"(&$".(&A"#?3#,3#A3N&
## Warning: package 'knitr' was built under R version 3.3.2&
!"#$"%&&
'()&
*#+,-#%.&
/#(0&
!1002&
*"##3A+&+"&
4BJ&K',#)"</&
"#2$L&
3%40&Q"+&Q33)3)&56,.#+(0&9++;0RR<<<N;.++$N"(H&
I(33&
76-)86--%&
!(%#/I3(&I,23/&
+"RI("F&4BJ&
3%49:956,.#+(09++;/0S,23T,22%8;("U3A+N"(H&
I(33&
;1<-6&)9
=)>0&
'(,+,#H&+3D+&
I,23/&
3%49:956,.#+(0&<<<N/.-2,F3+3D+NA"FRV&
I(33&
?;01.6#&
7#%2$M3&C&;2"+&
)%+%&
3%49%,.956,.#+(0&S,(/+0&W9++;/0RRA(%#N(8;("U3A+N"(H&C&
<<<N(/+.),"NA"FR;(").A+/R(/+.),"R)"<#2"%)&
I(33&
!1==@9
X.!!Y&%22"</&;3";23&<,+9&',#)"</&+"&%AA3//&+93&A"F;.+3(/&"#&4BJ&I("F&+93&A"FF%#)&2,#3N&J+&
,/&%&+3(F,#%2&%;;2,A%+,"#&+9%+&<3&<,22&./3&/;3A,I,A%22$&I"(&4BJN&3%49%,.9A6,1>91()"(9.#9,#09,)).9
0#90B6(5&%#)&<,22&%AA3//&4BJ&./,#H&+93,(&0)"&6,%-&,#/+3%)N&
76-)86--%9
S,23T,22%&<,22&23+&$".&A"##3A+&$".(&A"F;.+3(&+"&+93&/.;3(A"F;.+3(/&%+&4BJ&9++;/0RRS,23T,22%8
;("U3A+N"(HN&!9,/&,/&%&;",#+&%#)&A2,AP&;("H(%F&<,+9&%&?,/.%2&./3(&,#+3(I%A3N&Y".&A%#&,#/+%22&,+&%#)&
<3&<,22&/3+&.;&+93&4BJ8/;3A,I,A/&2%+3(&<93#&<3&I,(/+&2"H&,#&+"&4BJN&
;1<-6&)9=)>09
B.-2,F3&!3D+&,/&%&+3D+&3),+"(&9++;/0RR<<<N/.-2,F3+3D+NA"FRVN&:./+&2,P3&<3&F,H9+&./3&4,A("/"I+&
'"()&I"(&<(,+,#H&".(&;%;3(/&%#)&%//,H#F3#+/5&<3&#33)&%&+3D+&3),+"(&+"&<(,+3&+93&+$;3&"I&+3D+&
I,23/&<3&./3&,#&A"F;.+%+,"#%2&F,A("-,"2"H$N&'3&#33)&%&/;3A,%2&+3D+&3),+"(&-3A%./3&+93&
A"F;.+3(&<,22&-3&(3%),#H&,#&%#)&;("A3//,#H&".(&I,23/&),(3A+2$5&/"&+93$&F./+&-3&I"(F%++3)&
;(";3(2$N&'3&A%##"+&./3&/"F3+9,#H&2,P3&'"()5&"(&Q"+3;%)&-3A%./3&+93$&3F-3)&/;3A,%2&
F"),I,A%+,"#/&,#+"&+93&I,23/&+93$&A(3%+3N&S"(&+93&3%/3&"I&+(".-23/9""+,#H5&3?3($"#3&,/&(3Z.,(3)&+"&
./3&;1<-6&)9=)>09CN&!9,/&,/&%&;",#+&%#)&A2,AP&K%#)&+$;3L&;("H(%F&<,+9&%&?,/.%2&./3(&,#+3(I%A3N&Y".&
A%#&,#/+%22&%#)&./3&,+&(,H9+&%<%$N&
?;01.6#9
?&,/&+93&;("H(%FF,#H&2%#H.%H3&<3&<,22&./3&+"&%#%2$M3&%#)&;2"+&".(&)%+%N&'3&A%#&+9,#P&"I&,+&2,P3&
4,A("/"I+&1DA32N&?&A%#&)"&3?3($+9,#H&<3&<".2)&#33)&+"&)"&,#&1DA32&%#)&%&lot&F"(3N&!"&)"<#2"%)&
@5&<3&<,22&#33)&+"&)"<#2"%)&,+&I"(&$".(&;%(+,A.2%(&";3(%+,#H&/$/+3FN&*9""/3&$".(&%;;(";(,%+3&
2,#P&I("F&93(30&9++;0RRA(%#N./N(8;("U3A+N"(HRN&
7I+3(&+9,/&)"<#2"%)/5&$".&<,22&;("-%-2$&H3+&+<"&@&,A"#/&<9,A9&";3#&%&),%2"H.3&<9,A9&2""P/&
/,F,2%(&+"&+93&+3(F,#%2N&'3&<,22&#3?3(&-3&./,#H&+93/3&-3A%./3&,+&,/&P,#)&"I&%&A2.#P$&<%$&+"&./3&@N&
J#/+3%)&<3&<,22&-3&,#/+%22,#H&%#&J#+3H(%+3)&G3?32";F3#+&1#?,("#F3#+&KJG1L&I"(&@&A%223)&?;01.6#N&
J+&<"(P/&%/&%&<(%;;3(&I"(&@&+"&A(3%+3&/"F3<9%+&"I&%&;3(/"#%2,M3)&A"#/"23&<,+9&),II3(3#+&;%#3/&
A"#+%,#,#H&),II3(3#+&,#I"(F%+,"#&%-".+&<9%+&$".&%(3&<"(P,#H&"#N&!"&)"<#2"%)&,+&H"&+"&+9,/&2,#P&
%#)&A9""/3&+93&(,H9+&,#/+%223(&I"(&$".(&";3(%+,#H&/$/+3F0&
9++;0RR<<<N(/+.),"NA"FR;(").A+/R(/+.),"R)"<#2"%)RN&
Accessing+MSI+
What+is+MSI?+
4BJ&/+%#)/&I"(&+93&4,##3/"+%&B.;3(A"F;.+,#H&J#/+,+.+3N&J+&,/&%#&%A%)3F,A&.#,+&"I&+93&[#,?3(/,+$&
"I&4,##3/"+%N&!93$&"II3(&#.F3("./&/3(?,A3/5&/.A9&%&-%+A9&9,H9&;3(I"(F%#A3&A"F;.+,#H&K=X*L5&
,#+3(%A+,?3&=X*&%#)&)%+%&/+"(%H3N&S"(&+93&A"#+3D+&"I&+9,/&A".(/35&$".&A%#&+9,#P&"I&4BJ&%/&%&
(3/".(A3&"I&F%#$&2%(H3&A"F;.+3(/&+9%+&<3&A%#&./3&+"&;3(I"(F&".(&A"F;.+%+,"#%2&+%/P/N&
Serial+versus+Parallel+Computing+
'93#&<3&./3&4BJ&<3&A%#&A"F;23+3&".(&+%/P/&I%/+3(&,I&<3&(.#&+93F&,#&;%(%2232N&!9,/&,/&/,F,2%(&+"&
+9,#P,#H&%-".+&323A+(,A,+$N&>""P&%+&+93&3D%F;23&-32"<&<,+9&2,H9+&-.2-/N&\#&+93&23I+&+93&A,(A.,+&,/&
/3+&.;&,#&/3(,3/&K-.2-/&"#3&%I+3(&%#"+93(L5&"#&+93&(,H9+&+93&A,(A.,+&,/&/3+&.;&,#&;%(%2232&K323A+(,A,+$&
(3%A9,#H&%22&-.2-/&%+&+93&/%F3&+,F3LN&
&
'3&A%#&)"&+93&/%F3&+$;3&"I&+9,#H&<,+9&A"F;.+3(/N&O32"<&,/&%#&3D%F;23&"I&/3(,%2&A"F;.+%+,"#N&
'93#&(.#&%&A"FF%#)&"#&4BJ&<3&%(3&+%P,#H&%&-,H&)%+%/3+&%#)&3D3A.+,#H&%&-.#A9&"I&,#/+(.A+,"#/N&
!93&,#/+(.A+,"#/&9%?3&+"&-3&)"#3&,#&%&A3(+%,#&"()3(&%#)&"#2$&"#3&A%#&-3&)"#3&%+&%&+,F3N&JI&<3&)"&
+9,/&,#&/3(,3/5&,+&A%#&+%P3&%&2"#H&+,F3N&S"(&3D%F;235&,I&<3&<%#+&+"&%2,H#&F,22,"#/&"I&GQ7&/3Z.3#A3/&
+"&%&(3I3(3#A35&%2/"&P#"<#&%/&\![&;,AP,#H&K<3&<,22&+%2P&%-".+&<9%+&+9,/&,/&2%+3(&,#&+93&A".(/3L5&,+&
+%P3/&%&(3%22$&2"#H&+,F3&,I&<3&<%#+&+"&)"&,+&%AA.(%+32$N&
&
O32"<&,/&%#&3D%F;23&"I&;%(%2232&A"F;.+%+,"#N&'3&A%#&-(3%P&.;&".(&;("-23F&,#+"&/F%223(&A9.#P/&
%#)&(.#&+93F&%+&+93&/%F3&+,F3&"#&F.2+,;23&;("A3//"(/N&!9,/&<%$&<3&A%#&H3+&+"&".(&(3/.2+/&
I%/+3(N&J#&+93&3D%F;23&<3&%(3&./,#H&\![&;,AP,#H5&-.+&F%#$&"I&".(&A"F;.+%+,"#%2&F,A("-,"2"H$&
A"FF%#)/&A%#&-3&(.#&,#&;%(%2232N&
&
B"&(3%22$5&<3&./3&4BJ&/"&+9%+&<3&A%#&Z.,AP2$&A"F;23+3&+%/P/&+9%+&<".2)&+%P3&".(&"<#&A"F;.+3(/&
%&(3%22$&2"#H&+,F3N&
Accessing+MSI+
JI&$".&<%#+&+"&./3&$".(&;3(/"#%2&A"F;.+3(5&$".&<".2)&;9$/,A%22$&,#+3(%A+&<,+9&+93&P3$-"%()&
%#)&F"./3&%++%A93)&+"&,+N&!93&A"F;.+3(/&%+&4BJ&%(3&#"+&2"A%+3)&<93(3&<3&%(35&/"&<3&F./+&
%AA3//&+93F&(3F"+32$N&!"&.#)3(/+%#)&9"<5&<3&/9".2)&I,(/+&23%(#&/"F3&+3(F,#"2"H$0&
0)"&&
*)D6,606#,&
/-6),0&
!93&A"F;.+3(&<93(3&+93&./3(&,/&/,++,#H&%#)&<93(3&+93&A"##3A+,"#&,/&,#,+,%+3)&I("F&
;)"E)"&
!93&(3F"+3&A"F;.+3(&+9%+&%AA3;+/&+93&A"##3A+,"#&%#)&;("?,)3/&+93&/3(?,A3&
F!9
G..")((&
J#+3(#3+&X("+"A"2&%))(3//N&]&^8-,+&#.F-3(&+9%+&%22&A"F;.+3(/&A"##3A+3)&+"&+93&
,#+3(#3+&9%?35&+322,#H&./&<93(3R<9"&+93&A"F;.+3(&,/N&4BJ0&_2"H,#NF/,N.F#N3).&
;;H&
B3A.(3&B9322N&1#%-23/&A"FF%#)82,#3&A"##3A+,"#&-$&A(3%+,#H&%#&3#A($;+3)&
A"##3A+,"#&-3+<33#&$".(&A"F;.+3(&%#)&+93&(3F"+3&/3(?3(&
=#9%44)((93;F92#19&1(09<)91(6,$90B)9'3I9()41")9+6D6J9).1"#%&J9#"92#19&1(09<)9-#$$).96,0#9
0B)9K!I&
JI&$".&)"#`+&9%?3&+93&aXQ5&I"22"<&,#/+(.A+,"#/&93(3&I"(&"#&4"")23N&
Logging+In:+
3%4LA6,1>M9
\;3#&$".(&+3(F,#%2&%#)&+$;3&K+3/+L0&
ssh x500@login.msi.umn.edu&
J+&<,22&+93#&%/P&I"(&$".(&;%//<"()5&<9,A9&,/&$".(&Dbcc&;%//<"()N&F09+6--9,#09(B#+91N9%(92#1902N)9
60O&
56,.#+(M9#N),9!1==@M9
76"(09=6&)0&\;3#&X.!!Y&%#)&)"&+93&I"22"<,#HN&\#A3&$".&9,+&";3#5&,+&<,22&%/P&I"(&$".(&Dbcc&
;%//<"()N&
&
PE)"29Q0B)"906&)M&A2,AP&"#&$".(&-,"2defd&X.!!Y&/9"(+A.+N&
Logout:+
exit&
Servers+Available+at+MSI+
'93#&$".&I,(/+&%AA3//&4BJ&$".&%(3&"#&+93&2"H,#&/3(?3(N&!9,/&/3(?3(&A%#&-3&./3)&+"&2""P&%+&<9,A9&
I,23/&%(3&+93(3&%#)&F%P3&/,F;23&+3D+&I,23/N&!"&;3(I"(F&%#$&"+93(&+%/P/5&$".&F./+&F"?3&+"&%&
/3(?3(&+9%+&,/&A%;%-23&"I&;3(I"(F,#H&2%(H3(&+%/P/N&!93&/3(?3(&<3&<,22&./3&,/&R-%<RN&
Move+to+Lab+Server+
ssh lab&
Directory+Structure+on+MSI+
'3&A%#&/33&+93&3D%A+&),(3A+"($&/+(.A+.(3&"I&4BJ&./,#H&S,23T,22%N&63#3(%22$&,+&2""P/&2,P3&+9,/0&
R9"F3R&
&&&&-,"2defdR&
&&&&&&&&9"F3g),(3A+"($R&
&&&&&&&&/9%(3)R&
&&&&&&&&;.-2,AR&
Connecting+to+MSI+with+FileZilla+
!$;3&,#&+93&9"/+&JX&I"(&4BJ0&login.msi.umn.edu&
!$;3&,#&$".(&./3(#%F30&x500&
!$;3&,#&$".(&;%//<"()&
*2,AP&h.,APA"##3A+&
&
&
Commenting+Code+
iX("H(%F/&F./+&-3&<(,++3#&I"(&;3";23&+"&(3%)5&%#)&"#2$&,#A,)3#+%22$&I"(&F%A9,#3/&+"&3D3A.+3Ni&
&&&&8B+(.A+.(3&%#)&J#+3(;(3+%+,"#&"I&*"F;.+3(&X("H(%F/5&de^b&
What+is+commenting?+
'93#&<3&<(,+3&%#)&3D3A.+3&A")3&<3&%(3&+322,#H&+93&A"F;.+3(&+"&)"&/"F3+9,#HN&!93&A"F;.+3(&
(3%)/&".(&A")35&,#+3(;(3+/&<9%+&+"&)"&%#)&3D3A.+3/&,+N&7/&;3";235&<3&<(,+3&A"FF3#+/&,#&+93&
A")3&I"(&"+93(&;3";23&+"&(3%)N&!93/3&A"FF3#+/&,#A2.)3&3D;2%#%+,"#/&%/&+"&<9%+&+93&A")3&,/&
)",#H5&,#/+(.A+,"#/5&<9%+&+93&,#;.+/&%#)&".+;.+/&/9".2)&-3&%#)&"+93(&,F;"(+%#+&;,3A3/&"I&
,#I"(F%+,"#N&*"FF3#+/&%(3&#"+&(3%)&-$&+93&A"F;.+3(N&'3&3#/.(3&+93$&%(3&#"+&(3%)&-$&+93&
A"F;.+3(&-$&;2%A,#H&/;3A,%2&/$F-"2/&-3I"(3&+93&A"FF3#+&+3D+&+9%+&/+";/&+93&A"F;.+3(&I("F&
(3%),#H&+9%+&+3D+&%#)&+($,#H&+"&,#+3(;(3+&,+N&!93(3&%(3&),II3(3#+&+$;3/&"I&A"FF3#+&/$F-"2/&
)3;3#),#H&"#&<9,A9&2%#H.%H3&$".&%(3&<(,+,#H&,#N&S"(&+9,/&A".(/35&<3&<,22&-3&./,#H&+93&I"22"<,#H&
/$F-"20&j&
!93&;".#)&/$F-"25&"(&9%/9+%H5&+322/&+93&A"F;.+3(&+"&#"+&(3%)&%#$&+3D+&+9%+&I"22"</&+93&/$F-"2N&
!93&/$F-"2&,/&2,#3&/;3A,I,A5&/"&"#A3&%&#3<&2,#3&,/&/+%(+3)&+93&A"F;.+3(&A%#&%H%,#&(3%)&+93&+3D+N&
Example:+
# this line is a comment, not read by the computer&
cd file_path/to/DNA_sequences&
# the line above is NOT a comment and is read and executed by the computer&
B.FF%($0&`j`&k&%&A"FF3#+&I"(&./5&%#)&#"+&+93&A"F;.+3(N&
Comments+on+Comments+
*"FF3#+,#H&,/&%2/"&,F;"(+%#+&I"(&2%+3(&,#&+93&A".(/3&<93#&<3&-3H,#&+"&<(,+3&".(&"<#&A")3N&Y".&
<,22&-3&3D;3A+3)&+"&A"FF3#+&%#$&A")3&$".&<(,+3&/"&+9%+&,+&A%#&-3&,#+3(;(3+3)&-$&$".(&
,#/+(.A+"(/&%#)&;33(/N&S"(&+9,/&A2%//5&;23%/3&A"#/,)3(&+93&I"22"<,#H&;",#+/0&
dN =B)9E%-1)9#D9%94#&&),096(9N"#N#"06#,%-90#90B)9.6(0%,4)9<)0+)),90B)94#&&),09%,.90B)9
4#.)O&6"")&A"FF3#+/&/+%$&%/&A2"/3&%/&;"//,-23&+"&+93&A")3&+93$`(3&(3I3(3#A,#HN&7/&
),/+%#A3&,#A(3%/3/5&+93&A"FF3#+&-3A"F3/&F,/23%),#HN&
&
lN /#&&),0(9(B#1-.9<)94-)%"9%,.94#,46()&
&
VN /#&&),0(9(B#1-.94%N01")96,0),0O&O3A%./3&<3&%(3&23%(#,#H&,#&+9,/&A".(/35&<9%+&<3&<%#+&
".(&A")3&+"&)"&%#)&<9%+&,+&%A+.%22$&)"3/&F,H9+&-3&+<"&),II3(3#+&+9,#H/N&*"FF3#+,#H&$".(&
,#+3#+&A%#&932;&$".(&,#/+(.A+"(/&932;&$".m&
7)%;+3)&I("F&9++;/0RR-2"HNA"),#H9"(("(NA"FR<93#8H"")8A"FF3#+/8H"8-%)R&
The+Command+Line+
Why+Command+Line?+
'3&#33)&+"&-3&%-23&+"&,#+3(%A+&<,+9&+93&4,##3/"+%&B.;3(A"F;.+,#H&J#/+,+.+3`/&K4BJL&
/.;3(A"F;.+3(/N&B"F3&"I&".(&A"F;.+3(/&<"(P&<,+9&+93&',#)"</&"(&4%A&,\B&/$/+3F/5&<9,23&
+93&/.;3(A"F;.+3(/&%+&4BJ&(.#&[QJnN&!"&<"(P&<,+9&[QJn5&$".&9%?3&+"&23%(#&9"<&+"&3#+3(&
A"FF%#)/&I("F&+93&+3(F,#%2&+"&+322&+93&A"F;.+3(&<9%+&+"&)"N&!9,/&<,22&2,P32$&/+%(+&".+&%/&+(,%2&
%#)&3(("(&I"(&$".5&/"&)"#`+&H3+&),/A".(%H3)m&
Y".&/9".2)&+($&+93/3&A"FF%#)/&%#)&A"F-,#%+,"#/&"I&+93/3&A"FF%#)/&+"&H3+&%&-3++3(&
.#)3(/+%#),#HN&?)N)0606#,&,/&3//3#+,%2&+"&23%(#,#H&+93/3&A"FF%#)/&%#)&9"<&+93$&<"(PN&4.A9&
"I&<"(P,#H&<,+9&+93&A"FF%#)82,#3&,/&F3F"($N&B"&4#N2S%,.SN%(06,$94#&&%,.(96(9.6(4#1"%$).N&
!$;,#H&+93F&"#&+93&A"FF%#)82,#3&$".(/32I&<,22&%AA323(%+3&$".(&23%(#,#H&%#)&+93&A"FF%#)&2,#3&
;("?,)3/&,#/+%#+8I33)-%APN&JI&$".&F%P3&%&F,/+%P35&$".&A%#&./3&+93&1N9%""#+&"#&+93&P3$-"%()&+"&
A"((3A+&$".(&A"FF%#)5&"(&$".&A%#&/,F;2$&(38+$;3&,+&%#)&+($&+"&A"((3A+&$".(&3(("(N&
'93#&<"(P,#H&I("F&+93&A"FF%#)&2,#35&+93&A%/3&"I&+93&23++3(&,/&,F;"(+%#+N&>"<3(A%/3&`%`&)"3/&
#"+&3Z.%2&.;;3(A%/3&`7`N&B"&#"+&"#2$&)"&$".&9%?3&+"&-3&A%(3I.2&"I&+$;"/5&$".&%2/"&9%?3&+"&-3&
A%(3I.2&"I&+93&A%/3&"I&+93&23++3(/m&
X23%/3&#"+3&+9%+&,#&+9,/&F%#.%2&A"FF%#)&2,#3&+3/+&<,22&-3&,#&%&),II3(3#+&I"#+N&J#;.+/&KA"FF%#)/L&
+9%+&$".&A%#&%A+.%22$&+($&".+&<,22&-3&<#-.).N&',+9&+93&#3<&I"#+5&$".&/9".2)&-3&%-23&+"&+322&+93&
),II3(3#A3&-3+<33#&F%#$&A"FF"#&A9%(%A+3(/N&*9%(%A+3(/&/.A9&%/&c&%#)&%&A%;,+%2&\&%(3&),/+,#A+N&
7/&%(3&2"<3(8A%/3&2&%#)&+93&#.F-3(&dN&!93&;,;3&o&%#)&2&%#)&d&%(3&%2/"&),II3(3#+N&!9,/&I"#+&,/&
A%223)&4"#%A"N&
Additional+Help+
!93&I"22"<,#H&<3-/,+3&A%#&932;&<,+9&23%(#,#H&[QJnN&Y".&A%#&+$;3&,#&%&A"FF%#)5&<,+9&";+,"#/5&
%#)&,+&<,22&;("?,)3&%&)3/A(,;+,"#&"I&+9%+&A"FF%#)&%#)&%#$&";+,"#/&$".&/.;;2,3)N&!($&,+&<,+9&
ls -a&
H)-N9+)<(60)M9B00NMLL+++O)>N-%6,(B)--O4#&L&
Commands+
7&A"FF%#)&,#&[QJnR>,#.D&A%#&-3&%/&/,F;23&%/&2/N&O.+&A%#&%2/"&-3&F"(3&%)?%#A3)&%#)&9%?3&I2%H/5&
%II3A+,#H&9"<&+93&;("H(%F&,/&(.#5&%#)&%(H.F3#+/5&+322,#H&+93&;("H(%F&<93(3&+"&<"(P&"(&<9,A9&
I,23&+"&<"(P&<,+95&)3;3#),#H&"#&+93&A"FF%#)N&>3+`/&A"#/,)3(&+93&I"22"<,#H&A"FF%#)0&
ls -a ~/../sample/&
!9,/&A"FF%#)&2,/+/&+93&I,23/&%#)&),(3A+"(,3/5&,#A2.),#H&9,))3#&"#3/5&,#&+93&/%F;23&),(3A+"($N&
&
Q"+,A3&+93&I"22"<,#H0&
p !93&A"FF%#)5&I2%H/5&%#)&%(H.F3#+&%(3&%22&/3;%(%+3)&-$&%&(N%4)&
p !93&I,(/+&/3+&"I&A9%(%A+3(/&-3I"(3&%#$&/;%A3&"AA.(/&%(3&+93&A"FF%#)&KlsLN&JI&<3&I"(H"+&%&
/;%A35&%#)&+$;3)&2/8%5&+93&A"F;.+3(&<".2)&+($&+"&(.#&%&A"FF%#)&+9%+&,/&2,+3(%22$&#%F3)&`2/8
%`&<9,A9&)"3/&#"+&3D,/+5&%#)&$".&<,22&H3+&command not foundN&
&
G,%0#&29#D9%94#&&%,.9T%(9609-##U(96,90B)90)"&6,%-VM&
tward@login02 [~] % ls -a test_directory&
p ls&,/&+93&A"FF%#)&,+/32I&
q ls&/+%#)/&I"(&2,/+N&J+&+322/&+93&A"F;.+3(&+"&2,/+&+93&A"#+3#+/&"I&/"F3+9,#H&
p -a&,/&%&I2%H&+"&+93&A"FF%#)5&%II3A+,#H&9"<&+93&;("H(%F&I.#A+,"#/&
q S2%H/&F"),I$&+93&A"FF%#)N&-a&F3%#/&%225&%#)&,/&+322,#H&+93&A"F;.+3(&+"&2,/+&%22&I,23/&
K3?3#&9,))3#&"#3/L&
p test_directory&,/&+93&%(H.F3#+&+"&+93&A"FF%#)&
q J+&,/&+322,#H&+93&A"F;.+3(&<9%+&+"&2,/+0&>,/+&,/&%22&+93&I,23/&,#&+93&),(3A+"($&
~/Desktop/biol1961/file/&
p tward@login02 [~] %&,/&+93&+3(F,#%2&".+;.+&
q J+&+322/&./&<93(3&<3&%(3&K2"HH3)&,#&.#)3(&tward5&,#&+93&2"H,#&/3(?3(&login02LN&'9%+`/&
,#/,)3&+93&Wr&+322/&./&<9,A9&),(3A+"($&<3&%(3&,#&Ks&F3%#/&i9"F3iL5&%#)&t&+322/&$".&
<93#&+93&A"FF%#)&/+%(+/N&'3&<,22&23%?3&+9,/&;%(+&".+&I"(&%22&I.+.(3&3D%F;23/N&
&
*"FF%#)/&F%$&9%?3&%&M3("5&"#35&"(&F"(3&I2%H/&%#)&M3("5&"#35&"(&F"(3&%(H.F3#+/N&S"(&
3D%F;235&+93&cp&A"FF%#)&<9,A9&A";,3/&%&I,23&<,22&%2<%$/&9%?3&+<"&%(H.F3#+/5&-.+&(%(32$&./3/&
I2%H/N&
cp test_directory/sample.fastq test_directory/my_sample.fastq&
p cp&,/&+93&A"FF%#)&,+/32I&
p test_directory/sample.fastq&,/&+93&I,(/+&%(H.F3#+5&+93&#%F3&%#)&2"A%+,"#&"I&+93&I,23&+"&
-3&A";,3)&
p test_directory/my_sample.fastq&,/&+93&/3A"#)&%(H.F3#+5&+93&#%F3&%#)&2"A%+,"#&+"&
A";$&+93&I,23&+"&
!9,/&<,22&A";$&+93&sample.fastq&I,23&+"&+93&/%F3&),(3A+"($5&%#)&+93&#%F3&"I&+93&#3<&I,23&<,22&-3&
my_sample.fastqN&
Where+Are+We+Working?+
p '93#&<"(P,#H&I("F&+93&A"FF%#)&2,#35&<3&I,(/+&/+%(+&,#&".(&9"F3&),(3A+"($&K%&),(3A+"($&,/&
%#"+93(&#%F3&I"(&%&I"2)3(L&
p S("F&".(&9"F3&),(3A+"($&<3&A%#&(3I3(3#A3&"+93(&),(3A+"(,3/&"(&;%+9/&+"&I,23/&
p *"FF%#)/&%(3&;3(I"(F3)&,#&+93&),(3A+"($&$".&%(3&,#&
p '93#&$".&A9%#H3&),(3A+"(,3/&KA)L&$".&F"?3&I("F&"#3&2"A%+,"#&+"&%#"+93(&
p *"FF%#)/&)3I%.2+&+"&<"(P,#H&,#&$".(&current&),(3A+"($5&-.+&$".&A%#&+322&+93F&+"&<"(P&
/"F3<93(3&32/3&./,#H&%&/;3A,I,3)&;%+9&
&
!9,#P&"I&$".(&"<#&A"F;.+3(N&'93#&$".&";3#&$".(&iS,#)3(i&K4%AL&"(&',#)"</&1D;2"(3(&
K',#)"</L5&$".&/+%(+&,#&$".(&9"F3&),(3A+"($N&S("F&+93(3&$".&9%?3&"+93(&),(3A+"(,3/&$".&A%#&
3#+3(N&J#/,)3&+9"/3&),(3A+"(,3/&$".&F,H9+&9%?3&3?3#&F"(3&/.-),(3A+"(,3/N&>3+`/&./3&%#&3D%F;23&
"I&%&;,A+.(3/&I"2)3(&"#&$".(&)3/P+";N&JI&$".&<%#+&+"&%AA3//&X,A+.(3gd&I("F&lcd]5&$".&<".2)&H"&
+"&$".(&G3/P+";5&+93#&,#+"&X,A+.(3/5&+93#&,#+"&X,A+.(3/glcd]&%#)&A2,AP&;,A+.(3gdN&
&
Your+Home+Directory+
Y".(&9"F3&),(3A+"($&,/&$".(&)3I%.2+&/+%(+,#H&;2%A3&<93#&$".&2"H&,#&+"&4BJN&\#&4BJ5&F$&9"F3&
),(3A+"($&,/&`R9"F3R-,"2defdR+<%()`N&J&A%#&A93AP&+9,/&-$&";3#,#H&F$&+3(F,#%2N&2"HH,#H&,#&+"&4BJ5&
%#)&+$;,#H&;<)&K;(,#+&<"(P,#H&),(3A+"($LN&
pwd&&&&&/home/biol1961/tward&
J&A%#&%2/"&+3/+&+9,/&-$&<(,+,#H&./,#H&cd ~N&',+9&+9,/&<3&F"?3&+"&".(&9"F3&),(3A+"($&KA)&k&A9%#H3&
),(3A+"($5&s&k&9"F3LN&'3&+93#&A93AP&<9%+&),(3A+"($&<3&%(3&,#&<,+9&pwd&K;(,#+&<"(P,#H&
),(3A+"($LN&
cd ~&
pwd&
/home/biol1961/tward&
'3&A%#&%2/"&A93AP&./,#H&echo5&<9,A9&<,22&;(,#+&+93&".+;.+&"I&+93&~&+"&+93&/A(33#N&
echo ~&
/Users/tward&
Changing+Directories+
p !93&A"FF%#)&cd&A9%#H3/&),(3A+"(,3/&
p [/3&cd ~&+"&A9%#H3&+"&$".(&9"F3&),(3A+"($&
p [/3&cd ..&+"&F"?3&,#+"&+93&;%(3#+&),(3A+"($&K.;&"#3&I("F&<93(3&$".&%(3L&
p [/3&cd shared&+"&F"?3&,#+"&%&),(3A+"($&A%223)&`/9%(3)`&
cd ~/../shared/&
F3%#/&+"&A9%#H3&),(3A+"(,3/&+"0&
dN&4$&9"F3&),(3A+"($&./,#H&~&K<9,A9&,/&home/biol1961/twardL&
lN&6"&.;&+"&+93&;%(3#+&),(3A+"($&./,#H&../&
VN&7#)&+93&+"&+93&/9%(3)&),(3A+"($&./,#H&/shared&
JI&J&<%#+&+"&F"?3&-%AP&+"&F$&9"F3&),(3A+"($&I("F&93(35&J&A%#&+$;30&
cd ~&
<9,A9&F3%#/&+"&F"?3&+"&F$&9"F3&),(3A+"($N&#"+3&+9%+&cd ~&<,22&-(,#H&$".&9"F3&#"&F%+3(&
<93(3&$".&%(3N&
Try:moving:around:on:MSI:from:your:home:directory:to:the:main:biol1961:directory,:to:the:
shared:directory,:to:the:public:directory...&
Telling+Commands+Where+Files+Are+
p *"FF%#)/&C&;("H(%F/&<"(P&I("F&<93(3&$".&A.((3#+2$&%(3&K/330&pwdL&
p '93#&+322,#H&A"FF%#)/&9"<&+"&H3+&/"F3<93(35&$".&A%#&H,?3&%&/+%(+&;"/,+,"#5&/.A9&%/&,#&
+93&I"22"<,#H&A"FF%#)0&
cp ~/../shared/File.txt ~&
!9,/&+322/&+93&cp&A"FF%#)&+"&/+%(+&%+&$".(&9"F3&),(3A+"($&K~L5&%#)&+93#&F"?3&,#+"&+93&;%(3#+&
),(3A+"($&K..L5&%#)&,#+"&+93&/9%(3)&I"2)3(&%#)&A";$&+93&File.txt&I,23&+"&$".(&9"F3&),(3A+"($&
K~LN&
\.(&O,"2"H$&defd&),(3A+"($&/+(.A+.(3&/9".2)&2""P&2,P3&+9,/0&
home/ &
biol1961/ &
x500/&
shared/&
public/&
JI&<3&<%#+&+"&<"(P&I("F&".(&9"F3&),(3A+"($&<3&A%#&(3I3(3#A3&,+5&(3H%()23//&"I&<93(3&<3&
A.((3#+2$&%(35&2,P3&+9,/&/home/biol1961/x500N&Q"+30&%&/2%/9&,#&I("#+&F3%#/&+93&%-/"2.+3&;%+9&
I("F&+93&(""+&"I&+93&),(3A+"($&+(33&K9"F3LN&
?)-%06E)9N%0B0&biol1961/x500&
G<(#-10)9N%0B0&/home/biol1961/x500&"(&~&
uu(3F3F-3(&~&,/&/9"(+&I"(&$".(&9"F3&),(3A+"($N&
Absolute+versus+Relative+File+Paths+
Q"+,A3&+9%+&,#&A"F;%(,/"#&+"&<9%+&<3&+%2P&%-".+&%-"?35&93(3&<3&%(3&+%2P,#H&%-".+&(3I3(3#A,#H&
+93&;%+9&+"&%&I,235&#"+&+93&;%+9&,+/32IN&O.+&+93&(.23/&%(3&/+,22&+93&/%F3N&7#&%-/"2.+3&I,23&;%+9&,/&+93&
I,23`/&%))(3//&I("F&+93&(""+&"I&+93&A"F;.+3(`/&I,23&/$/+3FN&S"(&3D%F;230&
/home/biol1961/x500/my-file.txt&
,/&%#&%<(#-10)&I,23&;%+95&%#)&<,22&<"(P&I("F&%#$&),(3A+"($N&7&")-%06E)&I,23&;%+9&A".2)&-3&
my-file.txt&
!93&A"FF%#)&(3A3,?,#H&+9,/&I,23&;%+9&<,22&"#2$&<"(P&,I&+9%+&I,23&,/&,#&+93&/%F3&),(3A+"($&+93&
A"FF%#)&,/&(.##,#H&I("FN&7#"+93(&(32%+,?3&I,23&;%+9&A".2)&-30&
../my-file.txt&
!9,/&3D%F;23&<".2)&"#2$&<"(P&,I&+93&I,23&,/&-%AP<%()&"#3&),(3A+"($&K;%(3#+&),(3A+"($L&I("F&$".(&
A.((3#+&),(3A+"($N&Y".`22&H3+&%&iQ"&/.A9&I,23&"(&),(3A+"($i&3(("(&<93#&$".&9%?3&#"+&(3I3(3#A3)&
+93&2"A%+,"#&"I&+93&I,23&;(";3(2$N&?)-%06E)&I,23&;%+9/&%(3&./3)&+"&(3).A3&+$;,#H&%#)&F%P3&+9,#H/&
3%/,3(N&G<(#-10)&I,23&;%+9/&<,22&%2F"/+&%2<%$/&<"(PN&@3F3F-3(5&+93&<%$&+"&+322&%&")-%06E)&;%+9&
I("F&%#&%<(#-10)&;%+9&,/&-$&%&23%),#H&D#"+%".9(-%(B9LN&
Listing+Files+and+Directories+
p !93&A"FF%#)&ls&<,22&%22"<&$".&+"&2,/+&I,23/&%#)&),(3A+"(,3/&
p !93&)3I%.2+&2"A%+,"#&,+&2""P/&,/&$".(&N")(),09+#"U6,$9.6")40#"25&<9,A9&$".&A%#&,)3#+,I$&<,+9&
+93&A"FF%#)&N+.&
p Y".&A%#&%2/"&/;3A,I$&"+93(&2"A%+,"#/&+"&3D%F,#3v&-$&./,#H&ls ~/sample_directory/&$".&
%(3&%/P,#H&+"&2,/+&I,23/&%#)&),(3A+"(,3/&I".#)&,#&+93&/%F;23g),(3A+"($&I"2)3(&
p Y".&A%#&2,/+&9,))3#&I,23/&-$&./,#H&+93&I2%H&-a&%#)&$".&A%#&/33&I,23&/,M35&)%+35&F"),I,A%+,"#5&
%#)&;3(F,//,"#/&-$&./,#H&-l5&$".&A%#&H3+&I,23&/,M3/&,#&9.F%#&(3%)%-23&I"(F%+&<,+9&-h&
p Y".&A%#&A"F-,#3&+93/35&%#)&./3&ls -lah ~/Desktop/&+"&I,#)&".+&+9,/&,#I"(F%+,"#&I"(&I,23/&
,#&+93&/9%(3)&I"2)3(&
ls -lah /home/biol1961/tward/sample_directory&
total 1.0G&
drwxr-s---. 3 tward biol1961 4.0K Oct 3 13:04 .&
drwxr-xr-x. 55 root biol1961 12K Sep 6 09:30 ..&
-rw-r--r--. 1 tward biol1961 3.1K Sep 23 12:04 alpha_div.txt&
-rw-r--r--. 1 tward biol1961 0 Sep 27 13:39 alpha_otu.txt&
-rw-r--r--. 1 tward biol1961 0 Sep 27 13:39 alpha_taxa.txt&
-rw-------. 1 tward biol1961 5.8K Oct 3 13:02 .bash_history&
-rw-r--r--. 1 tward biol1961 403 Oct 3 13:04 .bashrc&
-rw-r--r--. 1 tward biol1961 2.4M Sep 28 11:14 Gever_100.biom&
Try:listing:all:of:the:files:in:your:home:directory&
Tab+Complete+
JI&$".&<%#+&+"&<(,+3&%&;%+9&"(&I,23&;%+95&$".&A%#&./3&+93&+%-&-.++"#&+"&932;&$".N&>3+`/&/%$&<3&
<%#+&+"&+$;3&+93&I"22"<,#H&/home/biol1961/tward/sample_directory/my-file.txtN&JI&<3&
/+%(+&+$;,#H&+93&I,(/+&A".;23&"I&23++3(/&/home/biol1961/twa5&<3&A%#&+93#&./3&+%-&+"&A"F;23+3&
+93&(3/+&I"(&./N&JI&+93(3&,/&"#2$&"#3&";+,"#5&+%-&<,22&I,22&,#&+93&<"()N&JI&+93(3&,/&F"(3&+9%#&"#3&
";+,"#5&9,++,#H&+%-&"#A3&<,22&#"+&I,22&,#&+93&<"()5&-.+&9,++,#H&,+&+<,A3&<,22&2,/+&%22&".(&";+,"#/N&
Try:writing:the:path:to:your:desktop:by:using:the:tab:button.&
Moving/Copying+Files+
!"&F"?3&I,23/5&./3&mvN&!"&A";$&I,23/5&./3&cpN&JI&<3&%(3&,#&+93&shared/&),(3A+"($5&+9,/&A"FF%#)&
F"?3/&%&I,23&A%223)&/3ZN+D+&I("F&+93&/9%(3)&),(3A+"($&+"&".(&9"F3&),(3A+"($N&
mv seq.txt ~&
!"&A";$&+93&/%F3&I,235&<3&<".2)&9%?3&./3)&A;&,#/+3%)&%#)&<3&<".2)&9%?3&%&A";$&,#&-"+9&
2"A%+,"#/N&
Try:copying:a:file:from:named&copy_me.txt&from:the&shared/&directory:into:your:home:
directory.&
Creating+a+Directory+
!"&A(3%+3&%&#3<&),(3A+"($5&./3&mkdirN&
J#&+9,/&3D%F;235&<3&./3&pwd&+"&/9"<&<3&%(3&,#&%&#3<&),(3A+"($&"#&+93&)3/P+";N&'3&./3&ls&+"&
/9"<&+9%+&+93(3&%(3&#"&I,23/&"(&),(3A+"(,3/&,#&+93&A.((3#+&),(3A+"($N&'3&+93#&A(3%+3&%&#3<&
),(3A+"($&<,+9&mkdir5&%#)&A%#&#"<&/33&,+&<,+9&ls5&%#)&+93#&<3&A9%#H3&,#+"&+9%+&),(3A+"($&<,+9&
cdN&
pwd&
Users/tward/Desktop/new_directory&
&
ls&
mkdir new_sub_directory&
&
ls&
new_sub_directory&
&
cd new_sub_directory/&
Try:making:a:new:directory:in:your:home:directory.:Trying:copying:a:file:from:the&shared/&
folder:(copy_me.txt):into:the:new:directory:you:made.&
Counting+Lines,+words+and+characters+
J#&A"F;.+%+,"#%2&F,A("-,"2"H$&<3&<,22&-3&./,#H&F.2+,;23&+$;3/&"I&+3D+&I,23/N&B"F3&"I&+93/3&I,23/&
<,22&,#A2.)3&/3Z.3#A3/&"I&GQ7&K%22&+93&75!565*`/L5&+%-23/&"I&9"<&F%#$&-%A+3(,%&%(3&,#&3%A9&
/%F;235&%#)&"+93(&I,23/&<,+9&,#I"(F%+,"#&%-".+&),?3(/,+$N&B"F3+,F3/&,+&,/&9%#)$&+"&-3&%-23&+"&
;.22&/#,;;3+/&"I&,#I"(F%+,"#&I("F&+93/3&I,23/N&S"(&3D%F;230&
p <3&A%#&A".#+&+93&#.F-3(&"I&2,#3/5&<"()/5&%#)&A9%(%A+3(/&,#&%&I,23&<,+9&+93&A"FF%#)&wc&
p i'"()/i&%(3&)3I,#3)&93(3&-$&H(".;/&"I&A9%(%A+3(/&K/+(,#H/L&/3;%(%+3)&-$&%&/;%A3N&
J#&+9,/&3D%F;235&<3&H3+&"#2$&+93&#.F-3(&"I&2,#3/&,#&+93&F$gI,23N+D+&I,23&./,#H&+93&-l&I2%H&<,+9&wcN&
Q3D+5&<3&H3+&+93&#.F-3(&"I&2,#3/5&<"()/5&%#)&#.F-3(&"I&A9%(%A+3(/&,#&+93&I,23&<93#&<3&)"#`+&
./3&+93&-l&I2%HN&
wc -l ~/my_file.txt&
552544 /home/biol1961/tward/my_file.txt&
&
wc ~/Desktop/biol1961/my_file.fastq&
552544 552544 90114322 /home/biol1961/tward/my_file.txt&
Try:counting:the:number:of:lines:in:the&copy_me.txt&file&
Getting+lines+from+a+file+(beginning+or+end)+
p !"&H3+&2,#3/&I("F&+93&-3H,##,#H&"I&%&I,23&"(&3#)&"I&%&I,235&<3&A%#&./3&+93&A"FF%#)/&head&"(&
tail&
p O$&)3I%.2+5&+93/3&A"FF%#)/&<,22&H,?3&./&dc&2,#3/5&-.+&<3&A%#&A9%#H3&+9,/&<,+9&+93&I2%H&-n K&
<93(3&E&,/&+93&#.F-3(&"I&2,#3/&$".&<".2)&2,P3&+"&(3A3,?3&
=3(3&,/&%#&3D%F;235&/3%(A9,#H&%&I,23&A%223)&sample.fastqN&!9,/&I,23&A"#+%,#/&%22&+93&/3Z.3#A3/&"I&
GQ7&I"(&%&/%F;23&,#&%&)%+%/3+N&B"5&+93&+";&l&2,#3/&I("F&%&I,23&<".2)&-3&93%)&-n 2
sample.fastq&
head -n 2 ~/Desktop/biol1961/sample.fastq&
@M00784_000000000-A8BPP:1:1101:14310:1364#0/2&
CGACAACCATGCATCACCTGTCACTTCTGTCCCCGAAGGGAAAAATGCGATTAGGCATCGGTCAAAAGGATCTCACC
CTTCGCTCATCTTCTTCGCGTTGCTTCTAATTCCACCACATGCTCCCCTACTTCTCCGCCTCCCCCTCACTTCCTTT
GAGTTTCACTCTTGCGAGCGTACTTCCCAGGCGTAGTACTTAATGCTTTCGCTGCGCCACCGTCGCGCTTCCCCCCC
CACCCCTCCTTCCCATCTTTTCCTCCCTCCCCCTCCCGCGTCTCCCATCCCCCTCCCCTTCTCCCCCACC&
Searching+a+File+
p '3&A%#&Z.,AP2$&/3%(A9&I,23/&<,+9&+93&grep&A"FF%#)N&
p !"&./3&grep&$".&/.;;2$&%&/+(,#H&+"&/3%(A9&I"(5&I"22"<3)&-$&%&I,23&+"&/3%(A9&,#&
p grep&<,22&/3#)&+93&(3/.2+&+"&/+%#)%()&".+;.+&K+$;,A%22$&+93&/A(33#L5&<9,A9&,/&+93&3#+,(3&2,#3&
+9%+&F%+A93/&+93&/3%(A9&/+(,#H&$".`?3&;("?,)3)N&
=3(3&%(3&/"F3&3D%F;23/5&/3%(A9,#H&sample.fastqN&!9,/&I,23&A"#+%,#/&%22&+93&/3Z.3#A3/&"I&GQ7&
I"(&%&/%F;23&,#&%&)%+%/3+N&
grep GGGGGGATGAT ~/Desktop/biol1961/sample.fastq&
CGACGGCCATGCAACACCTCCACAGGCGCCCCGAAGGGCCTCATCATCTCTGAAACATTCGCCTACAGTTCAAGCTC
CGGTAAGGTTCCTCGCGTATCATCCAATTAAACCCCCAGTTCCTCCGCTTTTGCCGGCCCCCGTCAATTCCTTTGAG
GTTCTACCCTGCCGGCGTACTCCCCCGGGGGGATGATTCATGCCTTCGCTTGGCCGCTTACGACAGACGCAACCAAC
GATCAACCATCATTTACGGCGTGCACTACACGGCTCACGATTCTCACTCCTCTCATCTATCACCACTCCC&
CGACAACCATGCAGCACCTGTATCAGTATCCCCGAAGGGACTATGTAACTTTACAGGAATTACTGGAAGGCAAGACC
TGGGAAGGGTCCTCGCGTTGCTACGAAATAAAACAAAAGCTCCGCAGCCTGTGCGGGCCCCCGTCAATTACATTGAG
GTTCAAACTTGCGGCCGTACTCACCAGGGGGGATGATTAATGTGTTTACTTCGGAAAAGAAGGGGTCGATACCCAAT
ACACCTAGCAGCAATCGTTTACAGTGTGGACTACAAGGGTATCTAGTCACCTGTATCTTATACAAATCTG&
CGACAACCATGCAGCACCTGCAAAGAGAGTACGAAGGAAGAGATAGTATTCAAAAGGGGCCACTGCAATTCAAGCAC
GGGGAAGGGTCCTCGGCGATCATTGAATTAAAACACATGGTCCTACGGTTGTGACGGGCCCCGTCAATTTCTTTGAG
GTTCACTGTTGCCGGAGTTATCCCCAGGGGGGATGATTAATGATTTTGCTGGGCCGCTCGAATGGTCTGGACAACAC
AGGGACTCGACATTATACGTTGAGGCGTGCCAGGGACACGAACACACGGTCATTTGTCATCAACACACCC&
!93&2,#3/&(3+.(#3)&%(3&?3($&2"#H&%#)&%(3&;(,#+3)&"#&F.2+,;23&2,#3/N&="<3?3(5&+93$&%(3&+93&/%F3&
23#H+9&%#)&$".&A%#&/33&<3&I,#)&+9(33&2,#3/&+9%+&A"#+%,#&GGGGGGATGAT&
Redirecting+command+output+(to+another+command)+
p '3&A%#&./3&+93&".+;.+&"I&"#3&A"FF%#)&%/&+93&,#;.+&"I&%#"+93(&
p !93&;,;35&|&5&,/&%&P3$&+$;,A%22$&%-"?3&+93&3#+3(&P3$N&J+&,/&%&?3(+,A%2&2,#3&
p !93&;,;3&(3),(3A+/&".+;.+&I("F&"#3&A"FF%#)&+"&,#;.+&"I&%#"+93(&A"FF%#)&
S"(&3D%F;235&<3&A%#&/33&9"<&F%#$&2,#3/&,#&+93&sample.fastq&I,23&A"#+%,#/&+93&/3Z.3#A3&
'GATTACA'N&grep&A".#+/&+93&#.F-3(&"I&2,#3/&`67!!7*7`&%;;3%(/&,#&+93&I,235&%#)&I33)/&,+&+"&
wc&+"&A".#+&+93&#.F-3(&"I&2,#3/N&
grep GATTACA ~/Desktop/biol1961/sample.fastq | wc -l&151&
'3&A%#&%2/"&/33&+93&2%/+&l&2,#3/&+9%+&F%+A9&GATTACA0&
grep GATTACA ~/Desktop/biol1961/sample.fastq | tail -2&
CGACGGCCATGCACCACCTCGGCCTCCGTCCGAAGAGCCACCCATCTCTGGGTGTTTCAGGCGCCGTTCGAGCCCGT
GTAAGGTTTCTTGCGTTTCATTGAATTTAACCACCTGTTTCTACGCCTGTTCGGGCCCCCCTCCAATTCCTTGAGGT
TTCACGCTTCCGATGTTCCTCCCAGGTGGATGTACTATTGCTGTCGCCTGGGCACCGACAGGGTTCCGCCGGCGGAC
ACCCATTATTCCTTGTTGAGTGGATTACATGGCAAGCTAATCACCCGTCTGTGTCTCTTCACACTCGCTC&
CGACGGCCATGCAACACATGTTTTCATGTCCCCGAAGGGAAAGCTCCATCTCTGGAGCGGTCAATCAATGTCAAGCC
TTGGTAAGGTTCTTCGCGTTGCGTCGAATTAAACCACATACTCCACCGCTTGTGCGGGCCCCCGTAAATTCCTTTGA
GGTTCATCCTTGCGGACGTACTCCCCAGGCGGGGTACTTATTGCGTTAACTCCGGCACAGAAGGGGTCGATACCTCC
TACACCGAGTACCCATCGTTTACGGCAAGGACTACCGGGGATTACAACTCCCTGTCGCCTCTACCAATCT&
'3&A%#&%2/"&/+(,#H&F.2+,;23&grep&A"FF%#)/&+"H3+93(5&I"(&3D%F;23&<3&A".2)&/3%(A9&I"(&2,#3/&
A"#+%,#,#H&`7!6`&+9%+&%2/"&A"#+%,#&`!76`&+9%+&%2/"&A"#+%,#&`67!!7*7`5&%#)&A".#+&+93&#.F-3(&"I&
F%+A93/N&
grep ATG ~/Desktop/biol1961/sample.fastq | grep TAG | grep GATTACA | wc -l&
138&
Redirecting+command+output+(to+a+file)+
p '3&A%#&%2/"&(3),(3A+&+93&".+;.+&"I&%&A"FF%#)&+"&%&I,23&
p \.+;.+&A%#&-3&),(3A+3)&+"&%&#3<&I,23&<,+9&w&K+9,/&<,22&(3;2%A3&3D,/+,#H&A"#+3#+&,I&/"F3+9,#H&
,/&%2(3%)$&+93(3mL&
p Y".&A%#&%))&+"&+93&3#)&"I&%&I,23&<,+9&ww&K+9,/&<,22&Q\!&(3;2%A3&3D,/+,#H&A"#+3#+5&-.+&%))/&+"&
,+&,#/+3%)L&
'3&A%#&./3&+93&/%F3&3D%F;23&%/&%-"?35&<93(3&<3&./3)&grep&+"&/3%(A9&I"(&2,#3/&A"#+%,#,#H&
`7!6`&+9%+&%2/"&A"#+%,#&`!76`&+9%+&%2/"&A"#+%,#&`67!!7*7`5&%#)&),(3A+&+93&".+;.+&+"&%&I,23&A%223)&
'many_grep.txt'N&
grep ATG ~/Desktop/biol1961/sample.fastq | grep TAG | grep GATTACA | wc -l >
many_grep.txt&
'3&A%#&<(,+3&),(3A+&+3D+&+"&%&I,23&./,#H&echoN&S"(&3D%F;235&<3&A%#&<(,+3&i+93&#.F-3(&"I&2,#3/&
+9%+&A"#+%,#&7!65&!76&%#)&67!!7*70i&+"&+93&3#)&"I&+93&many_grep.txt&I,23N&
echo "This is the number of lines that contain ATG, TAG, GATTACA" >>
many_grep.txt&
Try:writing:"I:will:master:computational:microbiology":to:a:new:file:called:'mantra.txt':on:your:
desktop.&
Exploring+.txt+files+from+terminal+
p '3&A%#&";3#&%#)&2""P&%+&+3D+&I,23/&I("F&+93&A"FF%#)&2,#3&<,+9&+93&A"FF%#)&nano&
p nano file.txt&";3#/&+93&I,23&
p 7(("<&P3$/&F"?3&.;&%#)&)"<#&
p G,(3A+,"#/&I"(&Z.,++,#H&+93&I,23&%(3&2"A%+3)&%+&+93&-"++"F&"I&+93&/A(33#&
'3&A%#&3D;2"(3&<9%+&+93&sample.fastq&I,23&2""P/&2,P3&-$&+$;,#H&+93&I"22"<,#H&+93&A"FF%#)5&
%#)&+93#&Z.,++,#H&-$&+$;,#H&&xN&
nano sample.fastq&
Try:looking:into:your:mantra.txt:file:on:your:Desktop.&
S"(&F"(3&A"FF%#)/&$".&A%#&./35&;23%/3&/33&+93&Bash_commands&I,23&"#&4"")23N&X23%/3&./3&
+9,/&I,23&%#)&.;)%+3&,+&<,+9&A"FF%#)/&$".&I,#)&./3I.2&%/&$".&A"F;23+3&+93&A"F;.+%+,"#%2&
F,A("-,"2"H$&/3A+,"#N&
Modifying+'.bashrc'+on+MSI+
!93&I"22"<,#H&,#/+(.A+,"#/&%(3&I"(&F"),I$,#H&$".(&.#F%/P&/3++,#H&"#&4BJN&!9,/&/3++,#H&,/&2,/+3)&
<,+9,#&$".(&N-%/9(A&I,23N&J+&A"#+("2/&<9"&9%/&%AA3//&+"&+93&I,23/&$".&A(3%+3&"#&4BJN&!93&)3I%.2+&
/3++,#H&,/&cxx5&<9,A9&F3%#/&%22&I,23/&%#)&),(3A+"(,3/&%(3&;(,?%+3N&'3&<%#+&+"&/3+&,+&+"&clx5&/"&+9%+&
;3";23&,#&".(&H(".;&K3NHN5&!7/&%#)&,#/+(.A+"(/L&A%#&9%?3&%AA3//&+"&+93&I,23/&$".&A(3%+3N&
WO9A#$9#,0#93;F&
ssh x500@login.msi.umn.edu&
X.!!Y&./3(/0&:./+&A2,AP&"#&$".(&4BJ&X.!!Y&/9"(+A.+&
XO9/#N290B)9.bashrc9D6-)9D"#&90B)9(B%").9.6")40#"290#92#1"9B#&)9.6")40#"2&
cp /home/biol1961/shared/.bashrc ~&
CO9;#1"4)92#1"9D6-)90#9&%U)9609%406E)&
cd ~&
source .bashrc&
Job+Submission+on+MSI+
Why+submit+jobs+
4BJ&./3/&U"-&Z.3.3/&+"&3II,A,3#+2$&%#)&I%,(2$&F%#%H3&<93#&A"F;.+%+,"#/&%(3&3D3A.+3)N&!93&
Z.3.,#H&/$/+3F&+9%+&4BJ&./3/&,/&A%223)&!Y;5&<9,A9&/+%#)/&I"(&!#"0%<-)9Y%04B9;2(0)&N&'3&
/.-F,+&%&(4"6N0&+"&+93&/.;3(A"F;.+3(&+"&-3&(.#&%+&%&2%+3(&+,F3&K<93#&+93&(3/".(A3/&%(3&
%?%,2%-23LN&!9,/&,/&+93&%2+3(#%+,?3&+"&<%,+,#H&%(".#)&I"(&9".(/5&;"+3#+,%22$&2"#H3(5&I"(&+93&
(3/".(A3/&$".&#33)&+"&-3A"F3&%?%,2%-23N&
What+is+a+script?+
7&/A(,;+&,/&%&I,23&A"#+%,#,#H&A"FF%#)/&+"&-3&(.#&,#&"()3(N&!93&/A(,;+&,+/32I&A%#&+93#&-3&(.#&2,P3&%&
A"FF%#)&%#)&<,22&3D3A.+3&%22&+93&+%/P/&".+2,#3)&,#&+93&I,23N&!93(3&%(3&F%#$&+$;3/&"I&/A(,;+/N&S"(&
3D%F;235&%22&+93&A"FF%#)/&<3&<,22&(.#&,#&hJJ41&%(3&/A(,;+/&+9%+&3D3A.+3&F%#$&+%/P/&I"(&./N&
7&XOB&U"-&/A(,;+&,/&%&+$;3&"I&/A(,;+&<3&./3&<,+9&+93&4BJ&/.;3(A"F;.+3(/N&J+&,/&%&/F%22&;2%,#&+3D+&
I,23&A"#+%,#,#H&,#I"(F%+,"#&%-".+&<9%+&(3/".(A3/&%&U"-&(3Z.,(3/&8&,#A2.),#H&+,F35&#.F-3(&"I&
#")3/&%#)&F3F"($N&!93&XOB&/A(,;+&%2/"&A"#+%,#/&+93&A"FF%#)/&#33)3)&+"&-3H,#&+93&)3/,(3)&
A"F;.+%+,"#N&
O32"<&,/&%#&3D%F;23&"I&%&XOB&/A(,;+&+9%+&<3&<,22&./3&(3;3%+3)2$N&S"(&".(&;.(;"/3/5&+93&+3D+&,#&
-2%AP&<,22&(3F%,#&+93&/%F3&I"(&%22&U"-/&/.-F,++3)&+"&4BJN&!93&+3D+&,#&(3)&<,22&?%($5&)3;3#),#H&"#&
+93&./3(&%#)&+93&U"-&+"&-3&/.-F,++3)N&
#! /bin/bash -l&
#PBS -l nodes=1:ppn=16,mem=2Gb,walltime=3:00:00&
#PBS -m abe&
#PBS -M your_email&
#PBS -o job_name_stdout&
#PBS -e job_name_stderr&
cd /home/biol1961/x500&
module load name_of_software&
command_X_Y_Z&
#!/bin/bash -l&
!93&I,(/+&2,#3&,#&+93&XOB&/A(,;+&)3I,#3/&<9,A9&+$;3&"I&/9322&+93&/A(,;+&<,22&-3&(3%)&<,+9N&'3&#33)&
+93&O7B=&/9322&%#)&,+&<,22&-3&(3%)&2,#3&-$&2,#3&K82LN&
#PBS -l nodes=1:ppn=16,mem=2Gb,walltime=3:00:00&
!93&/3A"#)&2,#3&A"#+%,#/&+93&XOB&(3/".(A3&(3Z.3/+N&!93&/%F;23&U"-&<,22&(3Z.,(3&V&9".(/5&d&#")35&
3%A9&<,+9&d&;("A3//"(&A"(3&;3(&#")3&K;;#L5&%#)&l&H,H%-$+3/&"I&@74&KF3FLN&Q"+3&+9%+&
A"FF%#)/&I"(&+93&XOB&Z.3.,#H&/$/+3F&-3H,#&<,+9&jXOBN&\+93(&A"FF%#)/&+"&-3&(.#&)"&#"+&
9%?3&+93&`jXOB`N&
#PBS -m abe&
#PBS -M your_email&
!93&+9,()&%#)&I".(+9&2,#3/&%(3&-"+9&A"FF%#)/&9%?,#H&+"&)"&<,+9&/3#),#H&F3//%H3&3F%,2/&+"&+93&
./3(N&!93&I,(/+&"I&+93/3&2,#3/&,#/+(.A+/&+93&XOB&/$/+3F&+"&/3#)&%&F3//%H3&3F%,2&<93#&+93&U"-&
%-"(+/5&-3H,#/5&"(&3#)/N&!93&/3A"#)&A"FF%#)&/;3A,I,3/&+93&3F%,2&%))(3//&+"&-3&./3)N&
#PBS -o job_name_stdout&
#PBS -e job_name_stderr&
!93&I,I+9&%#)&/,D+9&2,#3/&/;3A,I$&+93&#%F3/&"I&+93&I,23/&+"&<9,A9&+93&U"-`/&".+;.+&%#)&3(("(/&
/9".2)&-3&<(,++3#5&(3/;3A+,?32$N&Y".&A%#&A9%#H3&+93&#%F3&KU"-g#%F3L&+"&/;3A,I$&+93&U"-&+9%+&,/&
(.#&K3NHN&\![g;,AP,#Hg/+)".+LN&
cd home/biol1961/x500&
7&XOB&/A(,;+&/9".2)&%2/"&A"#+%,#&+93&%;;(";(,%+3&A9%#H3&),(3A+"($&A"FF%#)/&+"&H3+&+"&+93&U"-&
3D3A.+,"#&2"A%+,"#&K,#&+9,/&A%/3&+93&A"F;.+3(&<,22&F"?3&+"&+93&./3(`/&9"F3&),(3A+"($LN&
module load name_of_software&
!93&/A(,;+&%2/"&#33)/&+"&A"#+%,#&F").23&2"%)&A"FF%#)/&I"(&%#$&/"I+<%(3&F").23/&+9%+&+93&
A%2A.2%+,"#&F,H9+&#33)N&`4").23&2"%)`&,/&3II3A+,?32$&+93&/%F3&%/&";3#,#H&%#&%;;2,A%+,"#&"#&$".(&
A"F;.+3(N&!9,/&A".2)&-3&/"F3+9,#H&2,P30&module load qiime/1.8.0&
command_X_Y_Z&
!93&2%/+&2,#3/&"I&%&XOB&/A(,;+&A"#+%,#&A"FF%#)/&./3)&+"&3D3A.+3&+93&U"-N&!9,/&A".2)&-3&
/"F3+9,#H&2,P30&;,APg"+./N;$&8,&/3Z.3#A3/NI%/+%&8"&"+./N&
@#19&1(09+"60)92#1"9!Y;9(4"6N0(96,92#1"9N-%6,90)>09).60#"9T;1<-6&)9=)>09CV9%,.9(%E)90B)&9+60B9
0B)9)>0),(6#,9.pbs&
Check+a+Job+Status+
+93(3&%(3&+<"&A"FF%#)/&./3)&+"&+"&A93AP&+93&;("H(3//&"I&".(&U"-0&Z(0%0&%#)&(B#+ZN&S"(&".(&
"<#&U"-/5&<3&#33)&+"&./3&+93&S191()",%&)&I2%H&+"&/;3A,I$N&JI&+93(3&,/&#"&I2%H&I"(&Z(0%05&,+&<,22&
/9"<&%22&"I&+93&U"-/&A.((3#+2$&(.##,#H&"(&<%,+,#H&"#&+93&/;3A,I,3)&F%A9,#3N&(B#+Z&<,22&/9"<&
F"(3&,#I"(F%+,"#&%-".+&+93&U"-/5&,#A2.),#H&+93&/+%(+,#H&+,F35&3D;3A+3)&I,#,/9,#H&+,F35&%#)&
./%H3&"I&A"F;.+%+,"#%2&(3/".(A3/&K;("A3//"(/5&#")3/LN&
qstat -u x500&
showq -u x500&
Kill+a+Job+
'93#&<3&A93AP&+93&U"-&/+%+./5&+93(3&,/&%&:"-&JG&/+%(+,#H&<,+9&%&#.F-3(N&!9,/&#.F-3(&A%#&-3&
./3)&+"&P,22&+93&U"-N&S"(&3D%F;235&,I&+93&:"-&JG&,/&]VlxN#")3dc^dN2"A%2)5&+$;3&
qdel 4327&
"(&
qdel 4327.node1081.locald&
'3&A%#&P,22&F.2+,;23&U"-/&,#&%&("<&-$&/;3A,I$,#H&+93&:"-&JG/5&"(&./3&all5&,I&%22&+93&U"-/&#33)&+"&-3&
P,223)N&
QIIME+
What+is+QIIME?+
hJJ41&/+%#)/&I"(&h.%#+,+%+,?3&J#/,H9+/&J#+"&4,A("-,%2&1A"2"H$N&J+&,/&;("#".#A3)&`A9,F3`N&J+&,/&
;,;32,#3&I"(&;(3I"(F,#H&F,A("-,"F3&%#%2$/,/&I("F&(%<&GQ7&/3Z.3#A3/N&B"F3&"I&+93&+9,#H/&
hJJ41&A%#&)"&I"(&./&,#A2.)3/0&
p h.%2,+$&I,2+3(,#H&
p \![&;,AP,#H&
p 7//,H#,#H&+%D"#"F$&
p G,?3(/,+$&%#%2$/,/&
p a,/.%2,M%+,"#/&
p B+%+,/+,A/&
hJJ41&./3/&%&F,D&"I&"+93(&3D,/+,#H&/"I+<%(3/&%#)&%2H"(,+9F/&+"&;3(I"(F&,+/&+%/P/N&O3A%./3&"I&
+9,/&<3&A%22&,+&%&`<(%;;3(`N&!9%+&F3%#/&,+&<(%;/&.;&F%#$&"+93(&3D,/+,#H&+""2/&%#)&%2H"(,+9F/&,#&%&
;%AP%H3&+9%+&<"(P/&%/&"#3&A"93/,?3&.#,+N&
How+Do+We+Use+QIIME?+
7/&F3#+,"#3)&%-"?35&hJJ41&,/&%&<(%;;3(&I"(&F%#$&),II3(3#+&A"F;"#3#+/N&!9,/&F3%#/&,#/+%22,#H&
hJJ41&A%#&-3&3D+(3F32$&A9%223#H,#H&-3A%./3&,+&(3Z.,(3/&47QY&)3;3#)3#A,3/&K"+93(&;("H(%F/&
%#)&%2H"(,+9F/LN&S"(&+9,/&(3%/"#&K%#)&+93&A"F;.+3(&;"<3(&"I&4BJL5&<3&./3&+93&dN^Nc&?3(/,"#&"I&
hJJ41&,#/+%223)&"#&4BJN&
'93#&<3&%(3&"#&4BJ&%#)&2"HH3)&,#+"&+93&2%-&/3(?3(5&<3&A%#&+.(#&hJJ41&`"#`&-$&+$;,#H0&
module load qiime/1.8.0&
module load&F3%#/&".+&"I&%22&+93&F").23/&K;("H(%F/&,#/+%223)&"#&4BJL5&2"%)&+93&"#3&<3&%(3&
H",#H&+"&/;3A,I$N&!93(3&%(3&),II3(3#+&?3(/,"#/&"I&hJJ41&"#&4BJ5&/"&<3&F./+&/;3A,I$&+9%+&<3&<%#+&
?3(/,"#&dN^Nc&<,+9&qiime/1.8.0N&JI&$".&<%#+3)&+"&/33&%22&+93&),II3(3#+&F").23/&%?%,2%-23&"#&4BJ&
$".&A".2)&)"&/"&<,+9&module availN&JI&$".&<%#+3)&+"&+.(#&"II&%&F").23&$".&9%)&2"%)3)5&$".&
A%#&+$;3&module unload&I"22"<3)&-$&+93&#%F3&"I&+93&F").23N&
hJJ41&9%/&F%#$&A"FF%#)/&KI,23/&+9%+&A"#+%,#&"()3(3)&2,/+/&"I&A"FF%#)/&+"&-3&(.#L5&%#)&+93/3&
A"FF%#)/&A%#&-3&I".#)&%+&+93&2,#P&-32"<N&!9,/&,/&9"<&<3&A"F;23+3&),II3(3#+&/+3;/&,#&".(&
%#%2$/,/N&
[FF3P9/#&&%,.(M9B00NMLLZ66&)O#"$L(4"6N0(L&
!($&H",#H&+"&+9,/&;%H3&%#)&A2,AP&/"F3&"I&+93&A"FF%#)/&2,#P/5&I"(&3D%F;23&A2,AP&
summarize_taxa.py&%#)&(3%)&<9%+&+9,/&A"FF%#)&)"3/5&<9%+&+93&,#;.+/&%(3&%#)&+93&3D%F;23/N&
Q"+,A3&+9%+&%22&A"FF%#)/&3#)&,#&`ON2`N&!9,/&,/&-3A%./3&%22&+93&A"FF%#)/&%(3&<(,++3#&,#&%&
2%#H.%H3&A%223)&!20B#,5&%#)&A"FF%#)/&,#&X$+9"#&3#)&,#&`ON2`N&
QIIME+Workflow+
=3(3&,/&%&I2"<A9%(+&"I&+93&9"<&hJJ41&<"(P/0&
&
Q"+,A3&+93&,#,+,%2&,#;.+/&,#&H(33#0&
F,N10&
*)D6,606#,&
;)Z1),46,$9
Q10N10&
!9,/&,/&%22&"I&+93&(%<&GQ7&/3Z.3#A3/&I("F&+93&/3Z.3#A3(&KNI%/+Z&I,23/L&
3)0%.%0%&
!9,/&,/&%#"+93(&<"()&I"(&&%NN6,$9D6-)N&J+`/&%&+%-8)32,F,+3)&+3D+&I,235&<93(3&+93&
/%F;23&JG/&%(3&("</&%#)&+93&A"2.F#/&%(3&),II3(3#+&A%+3H"(,3/&"I&)%+%N&S"(&
3D%F;235&<9,A9&;(,F3(/&<3(3&./3)&I"(&/3Z.3#A,#H5&<9,A9&-")$&/,+3&+93&/%F;23&
,/&I("F5&A2,#,A%2&)%+%&K2,P3&,I&+93&/%F;23&A%F3&I("F&%&;3(/"#&<,+9&+93&),/3%/3&"(&%&
A"#+("2L5&3+AN&4%#$&+,F3/&<3&%A+.%22$&F%P3&+93/3&I,23/&,#&1DA325&%#)&3D;"(+&+93F&
%/&N+D+&I,23/N&
!93&(3/+&"I&+93&/+3;/&,#&+9,/&;,;32,#3&<,22&-3&A"?3(3)&,#&)3+%,2&+9(".H9".+&+93&(3/+&"I&+93&A".(/3N&
Running+Commands+
!"&)3+3(F,#3&9"<&+"&(.#&%&A"FF%#)5&<3&9%?3&+"&2""P&.;&+93&)"A.F3#+%+,"#N&'3&A%#&3,+93(&H"&
+"&+93&A"FF%#)&;%H3&KI"(&hJJ41L&F3#+,"#3)&%-"?3&%#)&A2,AP&"#&+93&A"FF%#)&<3&<%#+5&"(&<3&
A%#&+$;3&+93&I"22"<,#H0&
qiime_command.py -h&
O$&/;3A,I$,#H&-h&<3&%(3&/%$,#H&`=1>Xm`N&!93&A"FF%#)&<,22&2,/+&+93&)"A.F3#+%+,"#&%//"A,%+3)&
<,+9&,+N&!9,/&".+;.+&<,22&#"+&-3&%/&A"F;(393#/,?3&%/&<9%+&,/&%?%,2%-23&"#2,#35&-.+&<,22&%+&23%/+&+322&
./&%22&+93&;"//,-23&,#;.+/&%#)&".+;.+/N&
Example+
collapse_samples.py&
Online:+
&
!9,/&+322/&./&<9%+&+93&A"FF%#)&)"3/0&
[/3/&+93&F%;;,#H&I,23&+"&A"22%;/3&+93&\![&+%-23N&
!93&F,#,F.F&<3&#33)&+"&/;3A,I$&K(3Z.,(3)L&,/0&
p !93&\![&+%-23&<3&A%(3&%-".+&,#&N-,"F&I"(F%+&
p !93&F%;;,#H&I,23&<3&A%(3&%-".+&
p !93&".+;.+&I,23&;%+9&I"(&".(&A"22%;/3)&\![&+%-23&
p !93&".+;.+&I,23&;%+9&I"(&".(&A"22%;/3)&\![&+%-23&
p !93&I,32)&<3&<".2)&2,P3&+"&A"22%;/3&-$&
\;+,"#%22$5&<3&A%#&%2/"&)3+3(F,#30&y&!93&A"22%;/3&F")3&y&!"&#"(F%2,M3&"(&#"+&
Terminal:+
collapse_samples.py -h&
!9,/&<,22&+322&./&KF"(3&"(&23//L&+93&/%F3&,#I"(F%+,"#5&-.+&;(,#+/&,+&+"&+93&+3(F,#%2&/A(33#N&
Quality+Control+of+Sequence+Data+
'93#&<3&H3+&".(&GQ7&/3Z.3#A3/&I("F&+93&/3Z.3#A3(5&+93(3&,/&/"F3&Z.%2,+$&A"#+("2&+9%+&F./+&
-3&)"#3N&S"(&3D%F;235&+93&-%(A")3/&%#)&;(,F3(/&./3)&,#&+93&/3Z.3#A,#H&(3%A+,"#&/9".2)&-3&
(3F"?3)N&72/"&%#$&/3Z.3#A3/&+9%+&%(3&+""&2"<&,#&Z.%2,+$&/9".2)&-3&),/A%()3)N&
JI&<3&%(3&%#%2$M,#H&dfB&%F;2,A"#&)%+%5&+93#&<3&/9".2)&%2/"&+(,F&".(&/3Z.3#A3/&+"&+93&3D;3A+3)&
%F;2,A"#&/,M3N&JI&<3&./3)&;%,(383#)&/3Z.3#A,#H&<3&A%#&%2/"&/+,+A9&".(&;%,(/&+"H3+93(&+"&F%P3&
".(&/3Z.3#A3/&2"#H3(&%#)&"I&9,H93(&Z.%2,+$&I"(&%2,H#F3#+N&
'3&A%#&+(,F&%#)&I,2+3(&".(&(%<&/3Z.3#A3/&./,#H&F%#$&),II3(3#+&+""2/5&-.+&,#&+9,/&A".(/3&<3&<,22&
-3&./,#H&;HF\9TN"#,#1,).9(B6]),VN&!9,/&;("H(%F&<,22&)"&%22&+93&Z.%2,+$&A"#+("2&<3&#33)&%#)&
;(").A3&%&I,#%2&A"F-,#3)&I,23&/3Z.3#A3&I,23&+9%+&<3&<,22&./3&%/&+93&,#;.+&I"(&\![&;,AP,#H&K+"&
)3+3(F,#3&<9,A9&-%A+3(,%&%(3&,#&".(&/%F;23/LN&!93&,#;.+&+9%+&B=Jx&(3Z.,(3/&,/&%&),(3A+"($&"I&
NI%/+Z&I,23/5&<93(3&3%A9&/%F;23&%/&,+/&"<#&I%/+Z&I,23N&
What's+a+fastq+file?+
7&.fastq&I,23&A"#+%,#/&".(&GQ7&/3Z.3#A3/&%/&<322&%/&"+93(&,#I"(F%+,"#&(3H%(),#H&+93&Z.%2,+$&"I&
+93&/3Z.3#A,#H&(3%A+,"#N&1%A9&/3Z.3#A3&<,+9,#&%&.fastq&9%/&I".(&2,#3/&"I&,#I"(F%+,"#0&
p >,#3&d&-3H,#/&<,+9&%&`_`&A9%(%A+3(&%#)&,/&I"22"<3)&-$&%&/3Z.3#A3&,)3#+,I,3(&%#)&%#&";+,"#%2&
)3/A(,;+,"#&
p >,#3&l&,/&+93&(%<&/3Z.3#A3&23++3(/&K75!565*NNNL&
p >,#3&V&-3H,#/&<,+9&%&`y`&A9%(%A+3(&%#)&,/&";+,"#%22$&I"22"<3)&-$&+93&/%F3&/3Z.3#A3&
,)3#+,I,3(&
p >,#3&]&3#A")3/&+93&Z.%2,+$&?%2.3/&I"(&+93&/3Z.3#A3&,#&>,#3&l5&%#)&A"#+%,#/&+93&/%F3&
#.F-3(&"I&/$F-"2/&%/&23++3(/&,#&+93&/3Z.3#A3&
What's+a+fasta+file?+
7&.fasta&"(&.fna&I,23&A"#+%,#/&".(&GQ7&/3Z.3#A3/&"#2$N&1%A9&/3Z.3#A3&<,+9,#&%&.fasta&9%/&
+<"&2,#3/&"I&,#I"(F%+,"#0&
p >,#3&d&-3H,#/&<,+9&%&`w`&A9%(%A+3(&%#)&,/&I"22"<3)&-$&%&/3Z.3#A3&,)3#+,I,3(&%#)&%#&";+,"#%2&
)3/A(,;+,"#&
p >,#3&l&,/&+93&(%<&/3Z.3#A3&23++3(/&K75!565*NNNL&
!93&.fasta&"(&.fna&I"(F%+&,/&+93&,#;.+&I"(F%+&(3Z.,(3)&I"(&\![&;,AP,#HN&'93#&<3&Z.%2,+$&
A"#+("2&".(&/3Z.3#A3/&<3&%2/"&A"#?3(+&+93F&I("F&.fastq&+"&.fnaN&
How+Do+We+Quality+Control+Sequences?+
B=Jx&,/&A.((3#+2$&,#/+%223)&,#&+93&shared&),(3A+"($5&%#)&".(&.bashrc&I,23&A"#+%,#/&,#I"(F%+,"#&+"&
9%?3&4BJ&./3&+9,/&;("H(%F&%/&,I&,+&<3(3&,#/+%223)&%/&%&F").23N&!93&I.22&)"A.F3#+%+,"#&I"(&B=Jx&,/&
2"A%+3)&93(30&9++;/0RRH,+9.-NA"FRP#,H9+/82%-R/9,xN&
!"&(.#&B=Jx&;,AP,#H5&<3&<,22&./3&+93&F%,#&B=Jx&A"FF%#)&<,+9&+93&I"22"<,#H&;%(%F3+3(/0&
shi7.py &
-i directory_with_fastqs/ &
-o qc_reads_output&
!93&,#;.+&I,23&;%+9&,/&+"&+93&/3Z.3#A3/&$".&<%#+&+"&;("A3//&K8,LN&!9,/&/9".2)&-3&%&),(3A+"($&<,+9&
"#3&I%/+Z&;3(&/%F;23N&!93&#%F3&"I&3%A9&I%/+Z&/9".2)&-3&+93&/%F;23&JG&I"22"<3)&-$&+93&.fastq&
I,23&3D+3#/,"#N&!93&".+;.+&),(3A+"($&,/&<93(3&$".&<%#+&$".(&I,#%2&A23%#&/3Z.3#A3&I,23&+"&-3&K8"LN&
!93(3&%(3&/"F3&";+,"#%2&;%(%F3+3(/&+"&./3&)3;3#),#H&"#&+93&)%+%/3+N&!93&,#A2.)30&
-SE # This will use single-end mode. &
# If you don't have paired reads, use -SE&
-trim_q 32 # Trim sequences based on quality, default is 20, &
# increase to 32 if sequencing run is old (before 2015)&
--adaptor Nextera # You can specifically take out the adapter that was used &
# In most recent sequencing it's Nextera adapters&
--strip_underscore T # You can process the file names to keep the first part&
S"(&+93&I.22&2,/+&"I&";+,"#/5&$".&A%#&+$;30&
shi7.py -h &
!9,/&,/&+322,#H&B=Jx&+"&;(,#+&+93&932;&;%H3N&
!"&(.#&+9,/&A"FF%#)5&<3&F./+&/.-F,+&%&U"-&I,23N&Y".(&U"-&I,23&/9".2)&2""P&2,P3&+9,/0&
#!/bin/bash -l&
#PBS -l nodes=1:ppn=16,mem=2Gb,walltime=6:00:00&
#PBS -m abe&
#PBS -M x500@umn.edu &
#PBS -o job_name_stout&
#PBS -e job_name_stderr&
&
cd /home/biol1961/x500&
&
module load python&
&
shi7.py &
-i directory_with_fastqs/ &
-o qc_reads_output&
\I&A".(/35&$".&9%?3&+"&F"),I$&+93&I,23&+"&/;3A,I$&Y\[@&I,23&;%+9/&%#)&+93&".+;.+/&+"&<9%+&$".&
<%#+N&!9,#H/&+9%+&4%,,#0&A9%#H3&,#A2.)30&
p !93&I,(/+&V&2,#3/N&!93&nodes&%#)&ppn&F./+&-3&d&%#)&df&I"(&+93&2%-&Z.3.3&
p !93&__module load python&KB=Jx&(3Z.,(3/&;$+9"#&+"&(.#L&
722&+93&;%(%F3+3(/&I"(&+93&%A+.%2&A"FF%#)&&1(09<)9%--9#,9#,)9-6,)96,90B)9^#<9D6-)5&<,+9&%&/;%A3&
-3+<33#&+93&;%(%F3+3(&23++3(&%#)&?%2.3N&J#&+93&%-"?3&3D%F;23&+93$&%(3&"#&/3;%(%+3&2,#3/&/"&
+9%+&$".&A%#&(3%)&+93F&3%/,2$N&!93&I,23/&%#)&;%+9/&F./+&-3&/;3A,I,A&+"&$".N&
7/&F3#+,"#3)&%-"?35&+93&/A(,;+&<,22&(.#&,+/&"<#&U"-/&%I+3(&,+&9%/&/+%(+3)N&Y".&<,22&P#"<&Z.%22,+$&
A"#+("2&,/&)"#3&<93#&$".&9%?3&+93&I"22"<,#H&I,23/&,#&$".(&".+;.+&),(3A+"($&K8"L0&
(B6\O-#$&K%22&+93&,#I"(F%+,"#&%-".+&+93&Z.%2,+$&A"#+("2L&
4#&<6,)._()Z(OD,%&K$".(&A"F-,#3)&%#)&A23%#3)&/3Z.3#A3/&,#&NI#%&I"(F%+L&
Y".&<,22&%2/"&9%?3&/"F3&I,23/&+9%+&<3(3&H3#3(%+3)&-$&+93&U"-/&/.-F,++3)&-$&+9,/&/A(,;+N&!93$&
,#A2.)30&
^#<_,%&)_(0#10&K+93&/+%#)%()&".+&A%;+.(3)&-$&+93&U"-&/.-F,//,"#L&
^#<_,%&)_(0.)""&K+93&3(("(/&A%;+.(3)&-$&+93&U"-&/.-F,//,"#L&
Picking+OTUs+
What+is+OTU+picking?+
\![&;,AP,#H&,/&9"<&<3&+%P3&".(&dfB&GQ7&/3Z.3#A3/&%#)&%//,H#&+93F&+"&%#&\![&,)3#+,I,3(N&7#&
";3(%+,"#%2&+%D"#"F,A&.#,+&K\![L&,/&%&A2./+3(&"I&/,F,2%(&dfB&/3Z.3#A3&?%(,%#+/N&1%A9&A2./+3(&,/&
F3%#+&+"&(3;(3/3#+&%&+%D"#"F,A&.#,+&"I&-%A+3(,%&K/;3A,3/5&H3#./5&;9$2.FNNL&)3;3#),#H&"#&+93&
/3Z.3#A3&/,F,2%(,+$&+9(3/9"2)N&\![&A2./+3(/&%(3&./.%22$&)3I,#3)&-$&%&ext&,)3#+,+$&+9(3/9"2)&"I&
+93&dfB&H3#3&/3Z.3#A3&?%(,%#+/N&!93(3&%(3&+9(33&F%,#&+$;3/&"I&\![&;,AP,#H&+9%+&<3&A%#&)"N&
De-novo+
&
p G"3/#`+&./3&%&(3I3(3#A3&)%+%-%/3&
p 4%U"(,+$&"I&+93&(3%)/&%(3&A2./+3(3)&
p a3($&/2"<&
p 1(("#3"./&(3%)/&H3+&A2./+3(3)&
p *%##"+&%//,H#&+%D"#"F$&
&
Closed+Reference+
&
p @3I3(3#A3&)%+%-%/3&,/&Z.%2,+$&I,2+3(3)&
p S%/+3(&-3A%./3&$".&A%#&./3&;%(%2232&A"F;.+%+,"#&
p Q"&#3<&\![/&A%#&-3&"-/3(?3)&
p @3I3(3#A3&)%+%-%/3&-,%/&
p [/3/&+93&6(33#63#3/&)%+%-%/3&"I&%22&P#"<#&dfB&
p *%#&%//,H#&+%D"#"F$&
&
Open+Reference+
&
p *"F-,#3/&+93&+<"&%;;("%A93/&
p Q"&)%+%&,/&+9("<#&".+&
p G38#"?"&A2./+3(3)&\![/&A%##"+&-3&%//,H#3)&+%D"#"F$&
&
How+do+we+actually+pick+OTUs?+
G3;3#),#H&"#&+93&\![&;,AP3(&$".&A9""/3&+"&./35&\![&;,AP,#H&A%#&-3&very&A"F;.+,"#%22$&93%?$N&
!9,/&F3%#/&,+&A%#&(3Z.,(3&%&2"+&"I&+,F3&%#)&(3/".(A3/N&!9%#P/&+"&+93&)3?32";F3#+&3II"(+/&"I&
F,A("-,"F3&(3/3%(A93(/5&<3&9%?3&-33#&%-23&+"&/;33)&.;&+9,/&;("A3//&,FF3#/32$N&!93&\![&
;,AP3(/&<,+9,#&hJJ41&%(3&A.((3#+2$&#"+&+93&H"2)&/+%#)%()5&/"&<3&<,22&./3&%&),II3(3#+&\![&;,AP3(&
+9%+&,/&,#/+%223)&/3;3(%+32$N&
S"(&+9,/&A".(/3&<3&<,22&./3)&%&A2"/3)&(3I3(3#A3&\![&;,AP3(&A%223)&IFI`G5&<9,A9&/+%#)/&I"(&IFI`G9
F(9I#09`1(09G,#0B)"9%-6$,)"N&QJQ:7&,/&A.((3#+2$&,#/+%223)&,#&+93&shared&),(3A+"($5&%#)&".(&
.bashrc&I,23&A"#+%,#/&,#I"(F%+,"#&+"&9%?3&4BJ&./3&+9,/&;("H(%F&%/&,I&,+&<3(3&,#/+%223)&%/&%&
F").23N&!93&I.22&)"A.F3#+%+,"#&I"(&QJQ:7&,/&2"A%+3)&93(30&9++;/0RRH,+9.-NA"FR6%-372RQJQ:78
\XBN&
!"&(.#&\![&;,AP,#H5&<3&<,22&./3&+93&F%,#&QJQ:7&A"FF%#)&<,+9&+93&I"22"<,#H&;%(%F3+3(/0&
ninja.py &
-i combined_seqs.fna &
-o ninja_otus &
-m normal &
-p 4 &
-z &
-d 2&
!93&,#;.+&I,23&;%+9&,/&+"&+93&/3Z.3#A3/&$".&<%#+&+"&%2,H#&K8,LN&!93/3&/9".2)&-3&+93&".+;.+&"I&+93&
Z.%2,+$&A"#+("2&<3&),)&3%(2,3(N&!93&".+;.+&),(3A+"($&,/&<93(3&$".&<%#+&$".(&I,#%2&\![&+%-23&+"&
-3&K8"LN&!93&8F&;%(%F3+3(&/3+&+"&#"(F%2&+322/&QJQ:7&+"&(.#&%+&F3),.F&/3#/,+,?,+$&K+"&F%D,F,M3&
+93&/;33)&+"&%AA.(%A$&(%+,"LN&'3&<,22&./3&]&+9(3%)/&K8;L5&%#)&<3&<,22&/3%(A9&-"+9&GQ7&/+(%#)/&K8
MLN&'3&<,22&%2/"&/3+&)3#",/,#H&+"&l&K8)L5&<9,A9&F3%#/&<3&<,22&),/A%()&%#$&/3Z.3#A3/&+9%+&%;;3%(&
23//&+9%#&l&+,F3/N&
!"&(.#&+9,/&A"FF%#)5&<3&F./+&/.-F,+&%&U"-&I,23N&Y".(&U"-&I,23&/9".2)&2""P&2,P3&+9,/0&
#!/bin/bash -l&
#PBS -l nodes=1:ppn=16,mem=2Gb,walltime=6:00:00&
#PBS -m abe&
#PBS -M x500@umn.edu &
#PBS -o job_name_stout&
#PBS -e job_name_stderr&
&
cd /home/biol1961/x500&
&
module load python bowtie2&
&
ninja.py &
-i combined_seqs.fna &
-o ninja_otus &
-m normal &
-p 4 &
-z &
-d 2&
\I&A".(/35&$".&9%?3&+"&F"),I$&+93&I,23&+"&/;3A,I$&Y\[@&I,23&;%+9/&%#)&+93&".+;.+/&+"&<9%+&$".&
<%#+N&!9,#H/&+9%+&4%,,#0&A9%#H3&,#A2.)30&
p !93&I,(/+&V&2,#3/N&!93&nodes&%#)&ppn&F./+&-3&d&%#)&df&I"(&+93&2%-&Z.3.3&
&
p !93&__module load python bowtie2&KQJQ:7&(3Z.,(3/&;$+9"#&%#)&-"<+,3l&+"&(.#L&
722&+93&;%(%F3+3(/&I"(&+93&%A+.%2&A"FF%#)&&1(09<)9%--9#,9#,)9-6,)96,90B)9^#<9D6-)5&<,+9&%&/;%A3&
-3+<33#&+93&;%(%F3+3(&23++3(&%#)&?%2.3N&J#&+93&%-"?3&3D%F;23&+93$&%(3&"#&/3;%(%+3&2,#3/&/"&
+9%+&$".&A%#&(3%)&+93F&3%/,2$N&!93&I,23/&%#)&;%+9/&F./+&-3&/;3A,I,A&+"&$".N&
7/&F3#+,"#3)&%-"?35&+93&/A(,;+&<,22&(.#&,+/&"<#&U"-/&%I+3(&,+&9%/&/+%(+3)N&Y".&<,22&P#"<&\![&
;,AP,#H&,/&)"#3&<93#&$".&9%?3&+93&I"22"<,#H&I,23/&,#&$".(&".+;.+&),(3A+"($&K8"L0&
,6,^%_-#$O0>0&K%22&+93&,#I"(F%+,"#&%-".+&+93&%2,H#F3#+L&
,6,^%_#010%<-)O<6#&&K$".(&"+.&+%-23L&
Y".&<,22&%2/"&9%?3&/"F3&I,23/&+9%+&<3(3&H3#3(%+3)&-$&+93&U"-/&/.-F,++3)&-$&+9,/&/A(,;+N&!93$&
,#A2.)30&
^#<_,%&)_(0#10&K+93&/+%#)%()&".+&A%;+.(3)&-$&+93&U"-&/.-F,//,"#L&
^#<_,%&)_(0.)""&K+93&3(("(/&A%;+.(3)&-$&+93&U"-&/.-F,//,"#L&
JI&$".&<%#+&+"&;,AP&"+./&%H%,#5&$".&/9".2)&)323+3&+93/3&I,23/&;(,"(&+"&/.-F,++,#H&%#"+93(&\![&
;,AP,#H&U"-N&
What's+an+OTU+table?+
!93&".+;.+&"I&,6,^%ON2&,/&%#&\![&+%-23&,#&O<6#&&I"(F%+N&'93#&,#&N-,"F&I"(F%+&+93&I,23&,/&#"+&,#&%&
I"(F&<3&A%#&(3%)&3%/,2$N&'3&F./+&I,(/+&A"#?3(+&+93&+%-23&+"&N+D+&I,23&+"&?,3<&,+N&
Converting+.biom+files+
7&O<6#&&I,23&,/&%&<%$&+"&;%AP%H3&%&2"+&"I&,#I"(F%+,"#&,#&%&<%$&+9%+&)"3/#`+&+%P3&.;&+""&F.A9&
/;%A3N&O3A%./3&%22&+93&,#I"(F%+,"#&,/&A"F;%A+5&,+&F%P3/&+93&I,23&#"+&9.F%#&(3%)%-23N&JI&$".&A%22&
B)%.&"#&%&O<6#&&I,235&+93&".+;.+&<,22&2""P&F"/+2$&2,P3&H,--3(,/9N&'9%+&,/&,F;"(+%#+&,/&+9%+&hJJ41&
%#)&"+93(&F,A("-,"F3&/"I+<%(3/&./3&O<6#&&I,23/&-3A%./3&+93$&%(3&/F%223(&%#)&I%/+&+"&<"(P&<,+9N&
JI&$".&<%#+&+"&;.+&$".(&\![&+%-23&,#&%&9.F%#8(3%)%-23&I"(F%+&$".&9%?3&+"&A"#?3(+&,+&+"&%&+%-8
)32,F,+3)&I,23N&'3&<,22&A"?3(&+9,/&2%+3(N&
Biom+Summaries+
OTU+Tables+in+.biom+format+
'3&/+"(3&".+&\![&+%-23/&,#&+<"&),II3(3#+&I"(F%+/5&3,+93(&%/&%&+%-8)32,F,+3)&+3D+&I,23&KN+D+L&"(&%/&%&
A"F;%A+5&9.F%#&#"#8(3%)%-23&-,"F&I"(F%+&KN-,"FLN&'93#&<3&/+"(3&+93&+%-23&%/&%&-,"F&I,235&<3&
A%##"+&3%/,2$&2""P&,#&+93&I,23&+"&/33&9"<&F%#$&\![/&"(&/%F;23/&+93(3&%(35&-.+&<3&A%#&%AA3//&%&
/.FF%($&"I&+93&I,23&./,#H&/"F3&-,"F&A"FF%#)/&+9(".H9&hJJ41N&
Biom+Summary+
'3&A%#&/.FF%(,M3&".(&\![&+%-23&<,+9&+93&<6#&9(1&&%"6])S0%<-)&A"FF%#)&<9,23&./,#H&hJJ41&
,#+3(%A+,?32$0&
ssh lab&
&
cd /home/biol1961/x500&
&
module load qiime/1.8.0&
&
biom summarize-table -i file/path/to/otu_table.biom -o OTU_summary.txt&
!93&,#;.+&I,23&<".2)&-3&+93&I,23&;%+9&+93&Y\[@&\![&+%-23&%#)&+93&".+;.+&A%#&-3&<9%+3?3(&$".&
<".2)&2,P3&+"&#%F3&+93&/.FF%($&I,23N&!9,/&A"FF%#)&<,22&F%P3&%&+3D+&I,23&+9%+&A"#+%,#/&%&
/.FF%($&"I&$".(&\![&+%-23N&'3&A%#&2""P&%+&+93&#"#A+3#+/&"I&+93&N+D+&I,23&-$&./,#H&nano&K+93&+3D+&
3),+"(&"#&4BJLN&
nano OTU_summary.txt&
&
Y".&A%#&/33&%#&3D%F;23&"I&+93&/.FF%($&I,23&%-"?3N&J+&+322/&./0&u&!93&#.F-3(&"I&/%F;23/&u&!=3&
#.F-3(&"I&"-/3(?%+,"#/&K\![/L&u&!93&F,#,F.F5&F%D,F.F5&F3),%#5&F")3&%#)&/+%#)%()&
)3?,%+,"#/&"I&"I&+93&#.F-3(&"I&A".#+/&;3(&/%F;23&u&!93&+%D"#"F$&,/&/+"(3)&%/&+93&"-/3(?%+,"#&
F3+%)%+%&u&7&2,/+&"I&9"<&F%#$&A".#+/&%(3&,#&3%A9&/%F;23&
Rarefaction+
What+is+Rarefaction?+
J#&F,A("-,"F3&(3/3%(A95&),?3(/,+$&(3;(3/3#+/&+93&#.F-3(&\![/&<,+9,#&,#&%&)%+%&/3+N&!9,/&
#.F-3(&A%#&-3&H(3%+2$&,F;%A+3)&<,+9&),II3(3#+&/3Z.3#A,#H&)3;+9/N&S"(&3D%F;235&+93&)33;3(&
$".&/3Z.3#A3&+93&F"(3&/;3A,3/&$".&<,22&I,#)N&!9,/&,/&%&;("-23F5&3/;3A,%22$&,I&$".&/3Z.3#A3&
bc5ccc&(3%)/&I("F&"#3&/%F;23&%#)&"#2$&dcc&(3%)/&I("F&%#"+93(&/%F;23N&Y".&<".2)&2,P32$&I,#)&
F"(3&/;3A,3/&,#&+93&/%F;23&+9%+&,/&)33;2$&/3Z.3#A3)&Kbc5ccc&(3%)/L&,#&A"F;%(,/"#&+"&+93&"#3&
+9%+&<%/&/9%22"<2$&/3Z.3#A3)&Kdcc&(3%)/LN&
O32"<&,/&%#&3D%F;23&<93(3&<3&%(3&H",#H&+"&/3Z.3#A3&"#3&/%F;23&+9(33&+,F3/N&1%A9&A"2"(3)&)"+&
(3;(3/3#+/&%&F,A("-3&%#)&3%A9&A"2"(&(3;(3/3#+/&%&),II3(3#+&/;3A,3/N&!9(".H9&+93&;("A3//&"I&
GQ7&,/"2%+,"#5&dfB&X*@&%F;2,I,A%+,"#5&/3Z.3#A,#H5&Z.%2,+$&+(,FF,#H&%#)&\![&;,AP,#H&<3&A%#&2"/3&
,#I"(F%+,"#&"(&/3Z.3#A3/N&
&
!"&;(3?3#+&%#$&-,%/&<3&F%$&/33&,#&".(&),?3(/,+$&%#%2$/,/&<3&A%#&(%(3I$&".(&)%+%N&7&(%(3I%A+,"#&,/&
%&(%#)"F&A"223A+,"#&"I&/3Z.3#A3/&I("F&%&/%F;235&<,+9&%&/;3A,I,3)&#.F-3(&"I&/3Z.3#A3/&
K)3;+9LN&S"(&3D%F;235&%&(%(3I%A+,"#&<,+9&%&)3;+9&"I&dccc&(3%)/&;3(&/%F;23&,/&%&/,F.2%+,"#&"I&
<9%+&$".(&/3Z.3#A,#H&(3/.2+/&<".2)&2""P&2,P3&,I&$".&/3Z.3#A3)&3D%A+2$&dccc&(3%)/&I("F&3%A9&
/%F;23N&O$&(%(3I$,#H&".(&\![&+%-23&<3&A%#&I%,(2$&F3%/.(3&%2;9%&),?3(/,+$&%A("//&/%F;23/N&
Exploring+Rarefied+Data+with+Alpha+Diversity+
J#&hJJ415&<3&A%#&I,(/+&3D;2"(3&".(&)%+%&-$&2""P,#H&%+&%2;9%&),?3(/,+$&%A("//&F.2+,;23&),II3(3#+&
/3Z.3#A,#H&)3;+9/N&!9,/&+%/P&,/&;3(I"(F3)&./,#H&+93&alpha_rarefaction.py&A"FF%#)&+9%+&
+%P3/&$".(&\![&+%-23&%#)&F%P3/&%&),(3A+"($&I.22&"I&F%#$&\![&+%-23/5&%22&"I&<9,A9&%(3&(3;3%+/&"I&
(%(3I%A+,"#/&%+&/;3AI,IA&)3;+9/N&!93&".+;.+&"I&+9,/&A"FF%#)&%22"</&./&+"&?,/.%2,M3&9"<&
F3%/.(3F3#+/&,#&%2;9%&),?3(/,+$&<,22&A9%#H3&%A("//&%&(%#H3&"I&/3Z.3#A3&)3;+9/&;3(&/%F;23N&
\#A3&<3&P#"<&9"<&+93&),?3(/,+$&A9%#H3/&<,+9&)3;+95&<3&A%#&A(3%+3&"#3&I,#%2&(%(3I,3)&+%-23&+"&
./3&I"(&".(&%2;9%&),?3(/,+$&%#)&-3+%&),?3(/,+$&A%2A.2%+,"#/N&
!93&A"FF%#)&,/&(.#&./,#H&+93/3&;%(%F3+3(/0&
alpha_rarefaction.py&
-i input_file_path&
-o output_directory&
-t tree_file_path&
-m metadata_file_path&
!93&,#;.+&I,23&;%+9&,/&+"&+93&"+.&+%-23&,#&N-,"F&I"(F%+&+9%+&$".&<%#+&+"&(%(3I$&K8,LN&!93&".+;.+&
),(3A+"($&,/&<93(3&$".&<%#+&$".(&I,#%2&(3/.2+/&+"&-3&K8"LN&!93&+(33&I,23&;%+9&,/&+93&2"A%+,"#&"I&+93&
6(33#63#3/&ex&;3(A3#+&;9$2"H3#$&+(33&K8(LN&
!93&alpha_rarefaction.py&A"FF%#)&<,22&)"&F.2+,;23&+9,#H/0&
KdL *(3%+3&F.2+,;23&(%(3I,3)&\![&+%-23/&%+&+3#&/+3;&,#A(3F3#+/&Ky&+3#&/3Z.3#A3/&3%A9&+,F3L&
/+%(+,#H&%+&%&F,#,F.F&23?32&"I&+3#&/3Z.3#A3/&%#)&/+";;,#H&%+&+93&F3),%#&#.F-3(&"I&
/3Z.3#A3/&;3(&/%F;23&
KlL @.#&alpha_diversity.py&"#&3%A9&"I&+93&(%(3I,3)&\![&+%-23/&./,#H&+93&A9%"d5&
"-/3(?3)g/;3A,3/5&%#)&;9$2"H3#3+,A&),/+%#A3&F3+(,A/&
KVL *"22%+3/&+93&(3/.2+/&I"(&3%A9&F3+(,A&%+&+93&?%(,"./&)3;+9/&,#+"&"#3&+%-23&;3(&F3+(,A5&<,+9,#&
+93&alpha_div_collated/&/.-),(3A+"($&
K]L X2"+/&+93&),II3(3#+&F3+(,A/&I"(&3%A9&A%+3H"($&,#&+93&F3+%)%+%&I,23&%#)&;2%A3/&+9"/3&<,+9,#&
+93&alpha_rarefaction_plots/&/.-),(3A+"($&
KbL G323+3/&%22&"I&,#+3(F3),%+3&+93&\![&+%-23/&,+&9%)&H3#3(%+3)&+"&)"&+93&%#%2$/,/&
KfL *(3%+3/&%&2"H&I,23&%#)&"?3(%22&(%(3I%A+,"#&;2"+&<,+9,#&+93&F%,#&".+;.+&),(3A+"($&
!"&(.#&+9,/&hJJ41&A"FF%#)5&<3&F./+&/.-F,+&%&U"-N&Y".(&U"-&I,23&/9".2)&2""P&2,P3&+9,/0&
#!/bin/bash -l&
#PBS -l nodes=1:ppn=16,mem=2Gb,walltime=6:00:00&
#PBS -m abe&
#PBS -M x500@umn.edu &
#PBS o job_name_stout&
#PBS -e job_name_stderr&
&
cd /home/bioltrm1/x500&
&
module load qiime/1.8.0&
&
alpha_rarefaction.py&
-i file/path/to/otu_table.biom&
-o file/path/to/whatever_you_want&
-t /home/biol1961/shared/97_otus.tre&
-m /home/biol1961/shared/map.txt&
\I&A".(/35&$".&9%?3&+"&F"),I$&+93&I,23&+"&/;3A,I$&Y\[@&I,23&;%+9/&%#)&+93&".+;.+/&+"&<9%+&$".&
<%#+N&!9,#H/&+9%+&A%##"+&A9%#H3&,#A2.)30&
u&!93&I,(/+&V&2,#3/N&!93&#")3/&%#)&;;#&F./+&-3&d&%#)&df&I"(&+93&2%-&Z.3.3&u&!93&F").23&2"%)&
Z,,F3RdN^Nc&'3&F./+&%2<%$/&2"%)&hJJ41&%#)&,+&F./+&-3&Z,,F3RdN^Nc&u&!93&q+&F./+&-3&+"&+93&ex&
;3(A3#+&6(33#63#3/&+(33&
722&+93&;%(%F3+3(/&I"(&+93&%A+.%2&A"FF%#)&F./+&-3&%22&"#&"#3&2,#3&,#&+93&U"-&I,235&<,+9&%&/;%A3&
-3+<33#&+93&;%(%F3+3(&23++3(&%#)&?%2.3N&J#&+93&%-"?3&3D%F;23&+93$&%(3&"#&/3;%(%+3&2,#3/&/"&
+9%+&$".&A%#&(3%)&+93F&3%/,2$N&7/&F3#+,"#3)&%-"?35&+93&/A(,;+&<,22&(.#&,+/&"<#&U"-/&%I+3(&,+&9%/&
/+%(+3)N&Y".&<,22&P#"<&alpha_rarefaction.py&,/&)"#3&<93#&$".&9%?3&+93&I"22"<,#H&I,23/&,#&
$".(&".+;.+&),(3A+"($&K8"L0&
alpha_div_collated/&K"#3&+%-23&;3(&F3+(,A&,#&93(3L&
alpha_rarefaction_plots/&K;2"+/&;3(&F3+%)%+%&A"2.F#L&
log_##.txt&K2"H&I,23L&
rarefaction_plots.html&K"?3(%22&;2"+L&
Y".&<,22&%2/"&9%?3&/"F3&I,23/&+9%+&<3(3&H3#3(%+3)&-$&+93&U"-/&/.-F,++3)&-$&+9,/&/A(,;+N&!93$&
,#A2.)30&^#<_,%&)_(0#10&K+93&/+%#)%()&".+&A%;+.(3)&-$&+93&U"-&/.-F,//,"#L&
^#<_,%&)_(0.)""&K+93&3(("(/&A%;+.(3)&-$&+93&U"-&/.-F,//,"#L&
JI&$".&<%#+&+"&(.#&+93&A"FF%#)&%H%,#5&$".&/9".2)&)323+3&+93/3&I,23/&;(,"(&+"&/.-F,++,#H&
%#"+93(&U"-N&
!"&2""P&%+&$".(&;2"+/5&$".&F./+&+(%#/I3(&+93&),06")&alpha_rarefaction.py&".+;.+&I"2)3(&I("F&
4BJ&+"&$".(&A"F;.+3(N&!93&rarefaction_plots.html&I,23&#33)/&"+93(&,#I"(F%+,"#&/.;;2,3)&
<,+9,#&+93&/.-I"2)3(/N&Y".&A%#&F"?3&+9(".H9&%22&"I&+93&;2"+/&-$&/323A+,#H&),II3(3#+&A%+3H"(,3/&
%#)&),?3(/,+$&F3+(,A/N&
&
Creating+a+Rarefied+OTU+Table+
How+do+you+pick+a+depth?+
Y".&<%#+&+"&;,AP&%&)3;+9&+9%+0&
KdL&E33;/&%/&F%#$&/%F;23/&%/&;"//,-23&K,/#z+&+""&9,H9L&
KlL&J/#z+&/"&2"<&+9%+&/%F;23/&%(3#z+&(3;(3/3#+%+,?3&"I&+93&+"+%2&),?3(/,+$&
& &
J#&+93&%-"?3&3D%F;23/&+93&23I+&;2"+&/9"</&%22&/%F;23/&%#)&+93&(,H9+&;2"+&/9"</&+93&F3%#&
#.F-3(&"I&"-/3(?3)&/;3A,3/&%AA"(),#H&+"&-")$&/,+3N&JI&<3&A9"/3&%&)3;+9&"I&lccc&/3Z.3#A3/5&<3&
<".2)&2""/3&/"F3&+93&"(%#H3&/%F;23/&"#&+93&(,H9+&K/%2,?%LN&'3&P#"<&+9,/&-3A%./3&"#&+93&H(".;&
;2"+/5&+93&2,#3&<,22&/+";&<93(3&%+&23%/+&"#3&/%F;23&,#&+9%+&H(".;&#"&2"#H3(&9%/&+9%+&F%#$&
/3Z.3#A3/N&!93&)3;+9&$".&A9""/3&,/&3#+,(32$&.;&+"&$".m&O.+&$".&/9".2)&-3&%-23&+"&U./+,I$&<9$&
$".&;,AP&,+N&Q"(F%22$&<3&+($&+"&;,AP&%&)3;+9&<93(3&+93&(%(3I%A+,"#&A.(?3/&-3H,#&+"&23?32&"IIN&!9,/&
F3%#/&I"(&3%A9&,#A(3%/3&,#&+93&#.F-3(&"I&/3Z.3#A3/5&<3&%(3&#"+&)3+3A+,#H&%#$&K"(&?3($&I3<L&
#3<&\![/N&J#&+93&%-"?3&3D%F;23&<3&<".2)&;("-%-2$&(%(3I$&%+&#"&2"<3(&+9%#&d5bccN&
Creating+your+Rarefied+OTU+Table+
\#A3&$".&9%?3&;,AP3)&%&)3;+9&-%/3)&"#&+93&alpha_rarefaction.py outputs5&$".&%(3&(3%)$&
+"&A(3%+3&%&(%(3I,3)&\![&+%-23N&!"&)"&+9,/5&<3&./3&+93&single_rarefaction.py&A"FF%#)&<,+9&
+93&I"22"<,#H&;%(%F3+3(/0&
single_rarefaction.py&
-i input_file_path&
-o output_file_path&
-d number_of_sequences&
!93&,#;.+&I,23&;%+9&,/&+"&+93&\![&+%-23&,#&N-,"F&I"(F%+&+9%+&$".&<%#+&+"&(%(3I$&K8,LN&!93&".+;.+&
I,23&;%+9&,/&<93(3&$".&<%#+&$".(&I,#%2&(%(3I,3)&\![&+%-23&+"&-3&K8"5&F%P3&/.(3&+93&#%F3&,/&
),II3(3#+&I("F&+93&"(,H,#%2mLN&O"+9&+93&,#;.+&%#)&".+;.+&<,22&-3&N-,"F&I,23/N&!93&2%/+&;%(%F3+3(5&K8
)L&,/&+93&)3;+9&$".&9%?3&A9"/3#&-%/3)&"#&+93&;2"+/&H3#3(%+3)&-$&%2;9%g(%(3I%A+,"#N;$N&
@3F3F-3(5&%22&+93&;%(%F3+3(/&I"(&+93&%A+.%2&A"FF%#)&F./+&-3&%22&"#&"#3&2,#3&,#&+93&U"-&I,235&
<,+9&%&/;%A3&-3+<33#&+93&;%(%F3+3(&23++3(&%#)&?%2.3&%#)&F./+&-3&/;3A,I,A&+"&$".N&
=B)9#10N109#D90B6(94#&&%,.9+6--9<)90B)9Q='90%<-)92#191()9D#"9%-NB%9%,.9<)0%9.6E)"(6029
4%-41-%06#,(O&
Filtering+OTU+Tables+
Why+do+we+Filter+Samples+From+an+OTU+Table?+
S,2+3(,#H&2"<&)3;+9&/%F;23/&I("F&%#&\![&+%-23&A%#&-3&./3)&%/&%#&%2+3(#%+,?3&I"(&(%(3I$,#H&%#&
\![&+%-23N&@%(3I$,#H&(3/.2+/&,#&+%P,#H&"#2$&%&/F%22&I(%A+,"#&"I&+93&"(,H,#%2&)%+%N&J+&A%./3/&%#&
,#A(3%/3&,#&+<"&+$;3/&"I&3(("(0&
!$;3&J&
u&iG3A(3%/3)&/;3A,I,A,+$i&"(&%#&,#A(3%/3)&2,P32,9"")&,#&/%$,#H&+<"&H(".;/&%(3&),II3(3#+&<93#&
+93$&%(3#`+&u&*%./3)&-$&(%(3I,3)&/%F;23/&(3F%,#,#H&"?3(8),/;3(/3)&K%&/F%22&#.F-3(&"I&
/3Z.3#A3/&A"F3&I("F&%&?%(,3+$&"I&/".(A3/L&
!$;3&JJ&
u&i>"//&"I&;"<3(i&"(&i)3A(3%/3)&/3#/,+,?,+$i&+"&)3+3A+&(3%2&),II3(3#A3/&-3+<33#&H(".;/&u&*%./3)&
-$&?%2.%-23&,#I"(F%+,"#&%-".+&),?3(/,+$&-3,#H&+9("<#&".+&
&
>3+`/&),/A.//&(%(3I%A+,"#&-$&2""P,#H&%+&+93&-,"F&/.FF%($&I"(&%#&"(,H,#%2&\![&+%-23&%#)&+93&
(%(3I,3)&?3(/,"#N&
&
Biom:Summary:of:Original:OTU:table:|:Biom:summary:of:rarefied:OTU:table:(2000:seqs):
Q"+,A3&9"<&,#&+93&%-"?3&3D%F;235&+93&I,(/+&/%F;23/&,#&+93&"(,H,#%2&\![&+%-23&%(3&2"<&,#&
/3Z.3#A3&#.F-3(N&\#3&/%F;23&9%/&"#2$&]&/3Z.3#A3/N&!93&2%/+&/%F;23&2,/+3)&9%/&"?3(&x5ccc&
/3Z.3#A3/5&%#)&+9%+&,/&/+,22&2"<&A"F;%(3)&+"&+93&(3/+&"I&+93&/3Z.3#A3/&,#&+93&)%+%/3+&K+93&
F3),%#&,/&dl5dld&/3Z.3#A3/LN&!93&+%-23&"#&+93&(,H9+&,/&+93&/%F3&\![&+%-23&(%(3I,3)&+"&l5ccc&
/3Z.3#A3/N&Q"+,A3&+9%+&+93&/%F;23/&-32"<&lccc&/3Z.3#A3/&%(3&+9("<#&".+5&%#)&+9%+&+93&
(3F%,#,#H&/%F;23/&%(3&/.-/%F;23)&+"&%#&3?3#&)3;+9&"I&l5cccN&!9%+&F3%#/&F"/+&/%F;23/&9%?3&
2"/+&%-".+&dc5ccc&/3Z.3#A3/m&!9%+`/&,/&%&2"+&"I&,#I"(F%+,"#&+"&+9("<&".+m&
'3&/9".2)&#"+35&+9%+&"%")D26,$9#1"9.%0%96(9(06--90B)9$#-.9(0%,.%".9+B),9&)%(1"6,$9%-NB%9%,.9
<)0%9.6E)"(602N&'93#&2""P,#H&I"(&/;3A,I,A&+%D%5&9"<3?3(5&<3&A%#&I,2+3(&2"<8)3;+9&/%F;23/&I("F&
".(&\![&+%-23&%#)&P33;&+93&I.22&)3;+9&"I&/3Z.3#A3/&I"(&+93&(3/+&"I&+93&/%F;23/N&'3&A%#&+93#&
%AA".#+&I"(&),II3(3#A3/&,#&/3Z.3#A,#H&)3;+9&-$&+(%#/I"(F,#H&+93&)%+%&2%+3(N&'3&%AA"F;2,/9&+93&
I,2+3(,#H&"I&2"<8)3;+9&/%F;23/&+9(".H9&+93&filter_samples_from_otu_table.py&A"FF%#)&,#&
hJJ41N&Q"(F%22$5&<3&P33;&/%F;23/&+9%+&9%?3&wd5ccc&/3Z.3#A3/&%#)&+9("<&".+&+93&"+93(/N&Y".&
A%#&A9""/3&+"&H"&9,H93(&"(&2"<3(&)3;3#),#H&"#&+93&/3Z.3#A,#H&(3/.2+/N&
How+do+We+Filter+an+OTU+Table?+
J#&hJJ415&+9,/&+%/P&,/&;3(I"(F3)&"#&$".(&\![&+%-23N&!93&hJJ41&A"FF%#)&
filter_samples_from_otu_table.py&+%P3/&$".(&\![&+%-23&%#)&F%P3/&%&#3<&?3(/,"#&"I&-"+9&
-%/3)&"#&+93&I,2+3(,#H&;%(%F3+3(/&$".&/3+N&
!93&A"FF%#)&,/&(.#&./,#H&+93/3&;%(%F3+3(/0&
filter_samples_from_otu_table.py &
-i input_file_path &
-o output_file_path &
-n number_of_sequences&
!93&,#;.+&I,23&;%+9&,/&+"&+93&"(,H,#%2&\![&+%-23&,#&N-,"F&I"(F%+&+9%+&$".&<%#+&+"&I,2+3(&K-iLN&!93&
".+;.+&I,23&;%+9&,/&<93(3&$".&<%#+&$".(&I,2+3(3)&\![&+%-23&+"&-3&K-oLN&!93&F,#,F.F&#.F-3(&"I&
/3Z.3#A3/&%&/%F;23&F./+&9%?3&+"&(3F%,#&,#&+93&\![&+%-23&,/&/3+&<,+9&+93&2%/+&;%(%F3+3(&K-nLN&
!93(3&%(3&"+93(&";+,"#/&$".&A%#&./3&+"&I,23(&$".(&\![&+%-235&/.A9&%/0&
-s valid_states&
--sample_id_fp path_to_text_file&
--negate_sample_id_fp path_to_text_file&
-m max_sequence_count&
!93&?%2,)&/+%+3/&23+&$".&/;3A,I$&%&F%;;,#H&A"2.F#&%#)&?%2.3/&,#&+9%+&A"2.F#&+9%+&%&/%F;23&F./+&
-3&%//"A,%+3)&<,+9&+"&(3F%,#&,#&+93&\![&+%-23&K-sLN&
S"(&3D%F;235&,I&<3&/%F;23)&;3";23&I("F&),II3(3#+&2"A%+,"#/&,#&+93&!<,#&*,+,3/&%#)&+93&A"223A+,"#&
2"A%+,"#&<%/&.#)3(&%&93%)3(&A%223)&`>"A%+,"#`&,#&+93&F%;;,#H&I,23N&JI&<3&<%#+3)&+"&P33;&"#2$&
/%F;23/&A"223A+3)&I("F&[;+"<#&%#)&G"<#+"<#&%#)&#"+&B+&X%.25&$".&A".2)&./30&-s
Location:Uptown,DowntownN&Y".&A".2)&%2/"&./3&%&/%F;23&JG&+3D+&I,235&<,+9&"#3&/%F;23&JG&;3(&
2,#35&+"&2,/+&<9,A9&/%F;23/&+"&P33;&K--sample_id_fpL5&"(&<9,A9&+"&-3&(3F"?3&I("F&+93&\![&
+%-23&K--negate_sample_id_fpL&N&Y".&A%#&%2/"&I,2+3(&./,#H&+93&F%D,F.F&#.F-3(&"I&/3Z.3#A3/&
%&/%F;23&A%#&9%?3&+"&(3F%,#&,#&+93&\![&K-mLN&
S"(&+93&I.22&2,/+&"I&;%(%F3+3(/&%#)&9"<&+"&./3&+93F5&$".&A%#&2""P&%+&+93&A"FF%#)&;%H3&"#&+93&
hJJ41&<3-;%H30&9++;0RRZ,,F3N"(HR/A(,;+/RI,2+3(g/%F;23/gI("Fg"+.g+%-23N9+F2&
!93&filter_samples_from_otu_table.py&A"FF%#)&<,22&A(3%+3&%&#3<&\![&+%-23&,#&-,"F&
I"(F%+&A"#+%,#,#H&"#2$&+93&/%F;23/&+9%+&F33+&+93&I,2+3(,#H&A(,+3(,%N&
!"&(.#&+9,/&hJJ41&A"FF%#)5&<3&A%#&./3&hJJ41&,#+3(%A+,?32$N&
{{{&|&(5&3?%2kS}&//9&2%-&
A)&R9"F3R-,"2defdRDbcc&F").23&2"%)&Z,,F3RdN^Nc&
I,2+3(g/%F;23/gI("Fg"+.g+%-23N;$&8,&I,23R;%+9R+"R"+.g+%-23N-,"F&8"&
I,23R;%+9R+"R<9%+3?3(g$".g<%#+N-,"F&8F&9"F3R-,"2defdR/9%(3)RF%;N+D+&88
".+;.+gF%;;,#HgI;&I,23R;%+9R+"R<9%+3?3(g$".g<%#+N+D+&8#&#.F-3(g"Ig/3Z.3#A3/&{{{&
\I&A".(/35&$".&9%?3&+"&F"),I$&+93&I,23&+"&/;3A,I$&Y\[@&I,23&;%+9/&%#)&+93&".+;.+/&+"&<9%+&$".&
<%#+N&S"(&+93&=4X&)%+%/3+&,#&+93&3D%F;23/5&+93&I,23&;%+9&I"(&+93&F%;&<".2)&(3F%,#&+93&/%F3N&
722&+93&;%(%F3+3(/&I"(&+93&%A+.%2&A"FF%#)&F./+&-3&%22&"#&"#3&2,#3&,#&+93&U"-&I,235&<,+9&%&/;%A3&
-3+<33#&+93&;%(%F3+3(&23++3(&%#)&?%2.3N&J#&+93&%-"?3&3D%F;23&+93$&%(3&"#&/3;%(%+3&2,#3/&/"&
+9%+&$".&A%#&(3%)&+93F&3%/,2$N&5)9(B#1-.91()9%9.)N0B9,#9-#+)"90B%,9Waaa9()Z1),4)(O&
Converting+Table+Types+
jjN-,"F&+"&N+D+&
'3&A%#&A"#?3(+&N-,"F&I,23/&+"N+D+&./,#H&biom convert&"#&4BJN&!"&)"&/"5&[FF3P9(B#1-.9<)9
-#%.).9D#"96,0)"%406E)91()O&!9,/&A"FF%#)&<,22&A"#?3(+&+93&.biom&I,23&+"&%&+%-8)32,F,+3)&.txt&
I,23N&J+&<,22&+%P3&,#&+93&\![&+%-23&K,#&.biom&I"(F%+5&8,L&%#)&<,22&".+;.+&%&#3<&.txt&I,23&K8"LN&
B;3A,I$,#H&-b&F3%#/&<3&%(3&H",#H&I("F&.biom&+"&.txt&%#)&--header-key specifies&,+&9%/&
+%D"#"F$N&>,P3&"+93(&-,"F&A"FF%#)/5&+9,/&A"FF%#)&F./+&-3&(.#&<93#&hJJ41&9%/&-33#&
2"%)3)&,#&,#+3(%A+,?3&F")3N&7H%,#5&+9,/&A"FF%#)&/9".2)&-3&(.#&%22&,#&"#3&2,#35&<,+9&+93&
;%(%F3+3(/&/3;%(%+3)&-$&"#3&/;%A3N&
ssh lab&
&
cd /home/biol1961/x500&
&
module load qiime/1.8.0&
&
biom convert &
-i table.biom &
-o table_from_biom_w_taxonomy.txt &
-b &
--header-key taxonomy&
!9,/&+3D+&I,23&"I&$".(&\![&+%-23&<,22&2""P&/"F3+9,#H&2,P3&+9,/N&!93&A"2.F#/&,#&+93&\![&+%-23&%(3&
+93&/%F;23/N&!93&("</&%(3&+93&\![&JG/N&!93&A"2.F#&93%)3(&I"(&+93&\![&JG/&,/&%2<%$/&#OTU IDN&
J#&+93&+3D+&I,235&3%A9&A"2.F#&,/&/3;%(%+3)&<,+9&%&`+%-`N&'93#&<3&";3#&+9,/&+%-8)32,F,+3)&+3D+&I,23&
,#&1DA325&1DA32&P#"</&+"&(3%)&3%A9&+%-&%/&%&#3<&A"2.F#N&!93&?%2.3/&,#&+93&\![&+%-23&%(3&+93&
A".#+/&I"(&+9%+&\![&JG&,#&3%A9&/%F;23N&S"(&3D%F;235&,I&<3&2""P&%+&+93&I,(/+&\![&Kd^ebcVL&<3&A%#&
/33&,+&"AA.(/&V]&+,F3/&,#&/%F;23&75&de&+,F3/&,#&/%F;23&O5&%#)&/"&"#N&Q"+,A3&+9%+&+93&2%/+&A"2.F#&
,/&,#0&%&/%F;235&,+&,/&+93&+%D"#"F$N&
&
.txt+to+.biom+
'3&A%#&A"#?3(+&".+&\![&+%-23/&I("F&.txt&+"&.biom&./,#H&biom convert&"#&4BJN&!9,/&F,H9+&-3&
,F;"(+%#+&+"&)"&%I+3(&<3&I,2+3(&%#)&#"(F%2,M3&".(&+%-23/&K,I&<3&<%#+&+"&./3&+93F&<,+9&hJJ41&
%H%,#LN&!"&)"&/"5&[FF3P9(B#1-.9<)9-#%.).9D#"96,0)"%406E)91()O&
ssh lab&
&
cd /home/biol1961/x500&
&
module load qiime/1.8.0&
&
biom convert &
-i normalized_table.txt &
-o normalized_table.biom &
--table-type "OTU table" &
--header-key taxonomy&
K!9,/&/9".2)&-3&<(,++3#&%22&"#&"#3&2,#3mL&
Alpha+Diveristy+
What+is+alpha+diversity?+
72;9%&),?3(/,+$&F3%/.(3/&9"<&F%#$&),II3(3#+&+9,#H/&%(3&<,+9,#&%&;%(+,A.2%(&%(3%&"(&3A"/$/+3F5&
%#)&,/&./.%22$&3D;(3//3)&-$&+93&#.F-3(&"I&/;3A,3/&K,N3N5&/;3A,3/&(,A9#3//L&,#&+9%+&3A"/$/+3FN&J#&
".(&A%/35&+93&3A"/$/+3F&,#&Z.3/+,"#&,/&+93&/%F;23&+$;3&<3&%(3&%#%2$M,#H&K/+""25&/",25&/P,#NLN&J+`/&
,F;"(+%#+&+"&(3F3F-3(&+9%+&%2;9%&),?3(/,+$&,/&+60B6,9%9(%&N-)5&<9,A9&,/&<9%+&F%P3/&,+&),II3(3#+&
+9%#&-3+%&),?3(/,+$5&<9,A9&<3&<,22&+%2P&%-".+&2%+3(&,#&+93&A".(/3N&!93&%F".#+&"I&),?3(/,+$&,#&%#$&
A"FF.#,+$&,/&3D+(3F32$&,F;"(+%#+&,#&)3+3(F,#,#H&3A"2"H,A%2&)$#%F,A/&K3NHN&A"FF.#,+$&
;(").A+,?,+$5&/+%-,2,+$5&%#)&(3/,2,3#A3LN&S"(&9.F%#/5&+93(3&,/&%&H(3%+&)3%2&"I&)%+%&)3F"#/+(%+,#H&
+9%+&+93&%#A3/+(%2&9.F%#&H.+&F,A("-,"F3&,/&F"(3&),?3(/3&+9%#&+93&F")3(#&"#35&%#)&+9%+&+9,/&
2"<3(&),?3(/,+$&,/&9,H92$&A"((32%+3)&<,+9&%&#.F-3(&"I&,F;"(+%#+&),/3%/3/N&!93(3I"(35&%2;9%&
),?3(/,+$&,/&%#&,F;"(+%#+&;93#"+$;3&,#&F,A("-,"F3&(3/3%(A9N&
G,II3(3#+&F3+(,A/&9%?3&-33#&)3?32";3)&+"&A%2A.2%+3&%2;9%&),?3(/,+$N&B"F3&"I&+93/3&,#A2.)30&
?64B,)((M&7&F3%/.(3&"I&+93&#.F-3(&"I&\![/&;(3/3#+&,#&%&/%F;23&
PE),,)((M&="<&F%#$&"I&3%A9&\![&,/&;(3/3#+&,#&%&/%F;23&
!B2-#$),)0649")-%06#,(B6NM&7AA".#+/&I"(&+%D"#"F$&%#)&;9$2"H3#3+,A&(32%+,"#/9,;/&
&
&
Richness:and:evenness:for:one:sample:in:a:microbiome:study.:
&
Example:of:a:phylogenetic:tree.:
What+are+Diveristy+Metrics?+
O32"<&%(3&/"F3&A"FF"#&%2;9%&),?3(/,+$&F3+(,A/&./3&,#&F,A("-,"F3&(3/3%(A9N&!93(3&%(3&
#.F3("./&"+93(&F3+(,A/&%?%,2%-23&,#&hJJ415&-.+&<3&)"#`+&#33)&+"&A"?3(&%22&"I&+93F&I"(&O,"2"H$&
lcclN&
Q<()"E).9;N)46)(9
!93&/,F;23/+&),?3(/,+$&,#)3Dv&,+&,/&U./+&+93&#.F-3(&"I&\![/N&
/B%#W9)(06&%0#"9
&
!9,/&,/&A"FF"#2$&./3)5&%#)&,/&-%/3)&.;"#&+93&#.F-3(&"I&(%(3&\![/&I".#)&,#&%&/%F;23&!93&
;("-23F&<,+9&+9,/&F3+(,A&,/&+9%+&,I&%&/%F;23&A"#+%,#/&F%#$&/,#H23+"#/&K\![/&+9%+&9%;;3#&U./+&
"#A35&./.%22$&-$&/3Z.3#A,#H&3(("(L&+93&*9%"&d&,#)3D&<,22&3/+,F%+3&H(3%+3(&/;3A,3/&(,A9#3//&+9%#&
,+&<".2)&I"(&%&/%F;23&<,+9".+&(%(3&\![/N&!9,/&;("-23F&,/&%?",)3)&,I&<3&I,(/+&I,2+3(&+93&(%(3&\![/&
I("F&".+&\![&+%-23N&J#&+93&3Z.%+,"#&B"-/&,/&+93&#.F-3(&"I&/;3A,3/&,#&+93&/%F;235&Sd&,/&+93&
#.F-3(&"I&/,#H23+"#/&K,N3N5&+93&#.F-3(&"I&/;3A,3/&<,+9&"#2$&%&/,#H23&"AA.((3#A3&,#&+93&/%F;23L&
%#)&Sl&,/&+93&#.F-3(&"I&)".-23+"#/5&<9,A9&,/&+93&#.F-3(&"I&/;3A,3/&<,+9&3D%A+2$&+<"&
"AA.((3#A3/&,#&+93&/%F;23&K*"2<3225&3+&%2N&dee]LN&
;B%,,#,96,.)>9
&
!9,/&,#)3D&%AA".#+/&I"(&-"+9&%-.#)%#A3&%#)&3?3##3//&"I&+93&/;3A,3/&;(3/3#+N&J+&%//.F3/&%22&
/;3A,3/&%(3&(3;(3/3#+3)&,#&%&/%F;23N&J#&+93&B9%##"#&,#)3D5&;&,/&+93&;(";"(+,"#&K#RQL&"I&
,#),?,).%2/&"I&"#3&;%(+,A.2%(&/;3A,3/&I".#)&K#L&),?,)3)&-$&+93&+"+%2&#.F-3(&"I&,#),?,).%2/&I".#)&
KQL5&2#&,/&+93&#%+.(%2&2"H5&~~&,/&+93&/.F&"I&+93&A%2A.2%+,"#/5&%#)&/&,/&+93&#.F-3(&"I&/;3A,3/&K*JBQN&
lcdcLN&
;6&N(#,96,.)>9
&
!93&B,F;/"#&,#)3D&,/&%A+.%22$&%&/,F,2%(,+$&,#)3D5&/"&+93&9,H93(&+93&?%2.3&+93&2"<3(&),?3(/,+$&,#&
+93&/%F;23N&J+&H,?3/&F"(3&<3,H9+&+"&A"FF"#&"(&)"F,#%#+&/;3A,3/N&J#&+93&B,F;/"#&,#)3D5&;&,/&+93&
;(";"(+,"#&K#RQL&"I&,#),?,).%2/&"I&"#3&;%(+,A.2%(&/;3A,3/&I".#)&K#L&),?,)3)&-$&+93&+"+%2&#.F-3(&
"I&,#),?,).%2/&I".#)&KQL5&~~&,/&/+,22&+93&/.F&"I&+93&A%2A.2%+,"#/5&%#)&/&,/&+93&#.F-3(&"I&/;3A,3/&
K*JBQ5N&lcdcLN&
!B2-#$),)0649*6(0%,4)9T!*95B#-)9="))V9
&
!93&;9$2"H3#3+,A&),/+%#A3&F3+(,A&./3)&F"/+&"I+3#&,/&XG&<9"23&+(33N&J+&,/&+93&/.F&"I&%22&
;9$2"H3#3+,A&-(%#A93/&A"##3A+,#H&\![/&+"H3+93(&<,+9,#&%&A"FF.#,+$N&XG&,/&+93&/.F&"I&1G&I"(&
3%A9&/;3A,3/&K,L&,#&+93&/%F;23N&1G&,/&+93&3?"2.+,"#%($&),/+,#A+,?3#3//N&J+&,/&A%2A.2%+3)&-$&+93&
/3A"#)&3Z.%+,"#&<93(3&I"(&/;3A,3/&,&,#&+(33&K!L5&1G&,/&+93&/.F&"I&3)H3&"I&23#H+9&~~~&,#&+93&/3+&
/K!5,5(L&A"##3A+,#H&/;3A,3/&,&+"&+93&(""+&K(L&%#)&B3&,/&+93&#.F-3(&"I&/;3A,3/&+9%+&)3/A3#)&I("F&
3)H3&3&K*%)"++35&3+&%2N&lcdcLN&O32"<&,/&%&I,H.(3&/9"<,#H&+93&A"F;"#3#+/&"I&%&;9$2"H3#3+,A&+(33&
I"(&(3I3(3#A3N&
&
The:components:of:a:phylogenetic:tree:(Vellend,:et:al.:2011.):
B"5&%/&$".&A%#&/335&3%A9&"#3&"I&+93&),?3(/,+$&F3+(,A/&,/&/2,H9+2$&),II3(3#+5&3%A9&<,+9&,+`/&
%)?%#+%H3/&%#)&),/%)?%#+%H3/N&J#&+3(F/&"I&F3%/.(,#H&(,A9#3//&%#)&3?3##3//5&3%A9&F3+(,A&,/&
/.FF%(,M3)&-32"<N&
;1&&%"29#D9*6E)"(60293)0"64(&
3)0"64&
3)%(1")&),0&
Q<()"E).9;N)46)(&
@,A9#3//&
/B%#W&
@,A9#3//&C&1?3##3//&
;B%,,#,&
@,A9#3//&C&1?3##3//&
;6&N(#,&
@,A9#3//&C&1?3##3//&
!*95B#-)9="))&
X9$2"H3#$&
How+do+we+calculate+alpha+diversity+in+QIIME?+
J#&hJJ415&<3&A%#&./3&".(&(%(3I,3)&"(&I,2+3(3)&\![&+%-23&+"&A%2A.2%+3&%2;9%&),?3(/,+$N&1%(2,3(&<3&
./3)&%2;9%&),?3(/,+$&F3+(,A/&+"&)3+3(F,#3&%&(3%/"#%-23&(%(3I%A+,"#&)3;+9&"(&%&(3%/"#%-23&
/3Z.3#A,#H&)3;+9&%/&I,2+3(,#H&A.+"IIN&Q"<&<3&<,22&./3&+93&alpha_diversity.py&A"FF%#)&,#&
hJJ41&+"&F%P3&%&I,#%2&%2;9%&),?3(/,+$&A%2A.2%+,"#&I"(&3%A9&/%F;23N&!93&A"FF%#)&,/&(.#&./,#H&
+93/3&;%(%F3+3(/0&
alpha_diversity.py&
-i file/path/to/otu_table.biom&
-o file/path/to/alpha_diversity.txt&
-m metrics,to,use&
-t file/path/to/tree&
!93&,#;.+&I,23&;%+9&,/&+"&+93&I,2+3(3)&"(&(%(3I,3)&"+.&+%-23&,#&.biom&I"(F%+&K-iLN&!93&".+;.+&I,23&
;%+9&,/&<93(3&$".&<%#+&$".(&%2;9%&),?3(/,+$&+%-23&+"&-3&K-oLN&!93&F3+(,A/&%(3&<9%+&$".&<".2)&
2,P3&+"&./3&%/&%#&3/+,F%+3&"I&),?3(/,+$5&%#)&/9".2)&-3&%&A"FF%&/3;%(%+3)&2,/+&<,+9&,#&/;%A3/&K-
mLN&!93&+(33&I,23&;%+9&,/&+"&+93&6(33#63#3/&ext&\![&+(33&K-tLN&S"(&%&I.22&2,/+&"I&+93&F3+(,A/&
%?%,2%-23&%#)&9"<&+"&/;322&+93F5&$".&A%#&+$;30&
alpha_diversity.py -s&
!93&".+;.+&,/0&
Known metrics are: ace, berger_parker_d, brillouin_d, chao1, chao1_ci,
dominance, doubles, enspie, equitability, esty_ci, fisher_alpha, gini_index,
goods_coverage, heip_e, kempton_taylor_q, margalef, mcintosh_d, mcintosh_e,
menhinick, michaelis_menten_fit, observed_otus, observed_species, osd,
simpson_reciprocal, robbins, shannon, simpson, simpson_e, singles, strong,
PD_whole_tree&
S"(&+93&I.22&2,/+&"I&;%(%F3+3(/&%#)&9"<&+"&./3&+93F5&$".&A%#&2""P&%+&+93&A"FF%#)&;%H3&"#&+93&
hJJ41&<3-;%H30&9++;0RRZ,,F3N"(HR/A(,;+/R%2;9%g),?3(/,+$N9+F2&
How+do+we+run+aplha_diversity.py?+
!"&(.#&+9,/&hJJ41&A"FF%#)5&<3&A%#&./3&hJJ41&,#+3(%A+,?32$N&I#0)M9@#19(B#1-.9<)91(6,$92#1"9
"%")D6).9Q='90%<-)b&
ssh lab&
cd /home/biol1961/x500&
module load qiime/1.8.0&
&alpha_diversity.py&
-i file/path/to/otu_table.biom&
-o file/path/to/alpha_diversity.txt&
-m shannon,simpson,choa1,PD_whole_tree&
-t /home/biol1961/shared/97_otus.tree&
722&+93&;%(%F3+3(/&I"(&+93&alpha_diveristy.py&A"FF%#)&F./+&-3&%22&"#&"#3&2,#35&<,+9&%&
/;%A3&-3+<33#&+93&;%(%F3+3(&23++3(&%#)&?%2.3N&J#&+93&%-"?3&3D%F;23&+93$&%(3&"#&/3;%(%+3&2,#3/&
/"&+9%+&$".&A%#&(3%)&+93F&3%/,2$N&
What+does+alpha_diversity.py+give+us?+
!93&".+;.+&"I&+93&%2;9%g),?3(/,+$N;$&A"FF%#)&,/&%&+%-235&<93(3&+93&A"2.F#/&%(3&+93&),II3(3#+&
),?3(/,+$&F3+(,A/&%#)&+93&("</&%(3&/%F;23/N&'3&A%#&+93#&./3&+9,/&+%-23&+"&F%P3&%2;9%&),?3(/,+$&
;2"+/5&+"&?,/.%2&".(&I,#),#H/N&'3&A%#&%2/"&+3/+&+"&/33&,I&+93&%2;9%&),?3(/,+$&,/&/,H#,I,A%#+2$&),II3(3#+&
-3+<33#&%#)&%A("//&),II3(3#+&/%F;23&+$;3/N&
&
?)D)"),4)(&
*%)"++3&4'5&3+&%2N&lcdcN&1A"2"H$&>3++3(/NdV0&ef8dcbN&
*"2<3225&@NEN&%#)&*")),#H+"#5&:N7N&dee]N&X9,2"/";9,A%2&!(%#/%A+,"#/&"I&+93&@"$%2&B"A,3+$0&
O,"2"H,A%2&BA,3#A3/N&V]b0dcd8dd^N&
*"FF.#,+$&J#?%/,?3&B;3A,3/&Q3+<"(P&K*JBQLN&lcdcN&="<&+"&*%2A.2%+3&O,"),?3(/,+$N&
9++;0RR<<<N;("+3A+,#H./#"<N"(H&
a3223#)&45&3+&%2N&lcddN&O,"2"H,A%2&),?3(/,+$0&I("#+,3(/&,#&F3%/.(3F3#+&%#)&%//3//F3#+N&\DI"()&
[#,?3(/,+$&X(3//N&
Beta+Diversity+
What+is+Beta+Diversity?+
J#&9,/&dexl&;.-2,A%+,"#&,#&!%D"#5&i1?"2.+,"#&%#)&43%/.(3F3#+&"I&B;3A,3/&G,?3(/,+$i5&@N&=N&
'9,++%P3(&2%,)&".+&+93&+3(F/&%#)&A"#A3;+/&I"(&9"<&<3&+9,#P&%-".+&%#)&)3I,#3&-,"),?3(/,+$N&=,/&
,)3%&<%/&+9%+&+93&+"+%2&/;3A,3/&),?3(/,+$&,#&%&2%#)/A%;3&K𝛾&"(&H%FF%8),?3(/,+$L&K3NHN&7>>&9.F%#&
H%/+(",#+3/+,#%2&K6JL&+(%A+/L&,/&)3+3(F,#3)&-$&+<"&),II3(3#+&+9,#H/0&
o&88888888o88888888888888888888888888888&WV9G-NB%9.6E)"(602&o&+93&F3%#&/;3A,3/&),?3(/,+$&%+&+93&9%-,+%+&
23?32&&o&𝛼&&o&e.g.&"#3&;3(/"#`/&6J&+(%A+&XV9Y)0%9.6E)"(602&o&+93&),II3(3#+,%+,"#&%F"#H&9%-,+%+/&&o&𝛽&&
o&e.g.&),II3(3#+&;3";23`/&6J&+(%A+/&
!93&+"+%2&),?3(/,+$5&H%FF%5&,/&%2;9%&F.2+,;2,3)&-$&-3+%0&𝛾&k&𝛼&u𝛽&
&
'3&9%?3&%2(3%)$&),/A.//3)&%2;9%&),?3(/,+$&%#)&9%?3&A"F;%(3)&+93&%?3(%H3&%2;9%&),?3(/,+$&"I&
/%F;23/&%A("//&-")$&/,+3/N&'3&I".#)&+9%+&,#)33)5&+93(3&%(3&/,H#,I,A%#+&),II3(3#A3/&,#&%2;9%&
),?3(/,+$&-3+<33#&-")$&/,+3/N&Q"<5&<3&%(3&,#+3(3/+3)&,#&2""P,#H&%+&+93&),II3(3#A3&K+93&3A"2"H,A%2&
),/+%#A3L&,#&+93&A"FF.#,+$&F3F-3(/&-3+<33#&/%F;23/&K3NHN&,#),?,).%2/L&%#)&H(".;/&"I&/%F;23/&
K3NHN&-")$&/,+3/LN&
S"(&3D%F;235&23+`/&/%$&$".&%(3&A"F;%(,#H&+93&-,"2"H,A%2&A"FF.#,+,3/&"I&%&lcFl&;%+A9&"I&+93&
6(3%+&O%((,3(&@33I&K(,H9+L&%#)&%&lcFl&"I&+93&7F%M"#&(%,#I"(3/+&K23I+LN&
&
O"+9&"I&+93/3&9%-,+%+/&9%?3&?3($&9,H9&%2;9%&K𝛼L&),?3(/,+$N&="<3?3(5&)3/;,+3&/,F,2%(2$&9,H9&%2;9%&
),?3(/,+$5&,I&$".&<3(3&+"&A"F;%(3&+93&A"F;"/,+,"#&+93/3&+<"&A"FF.#,+,3/&%+&+93&F%A("/A";,A&
23?325&+93$&%(3&%2F"/+&A"F;23+32$&#"#8"?3(2%;;,#HN&!93(3I"(35&+93$&<".2)&%2/"&9%?3&%&?3($&9,H9&
-3+%&),?3(/,+$&K𝛽LN&!9,/&9"<3?3(5&,/&%&?3($&3D+(3F3&3D%F;23N&
>3+`/&/%$&+9%+&,#/+3%)&"I&A"F;%(,#H&%&/,#H23&;%+A9&"I&A"(%2&(33I&%#)&%&/,#H23&;%+A9&"I&(%,#I"(3/+5&
$".&A"F;%(3&F.2+,;23&;%+A93/&"I&b&),II3(3#+&A"(%2&(33I/&+"&3%A9&"+93(&%#)&F.2+,;23&;%+A93/&"I&b&
),II3(3#+&(%,#I"(3/+/&+"&3%A9&"+93(N&Y".&F,H9+&I,#)&+9%+&+93&%?3(%H3&%2;9%&),?3(/,+$&,/&%-".+&+93&
/%F3&I"(&A"(%2&(33I/&%#)&(%,#I"(3/+/5&-.+&-3+%&),?3(/,+$&,/&/,H#,I,A%#+2$&9,H93(&I"(&(%,#I"(3/+/&+9%#&
I"(&A"(%2&(33I/N&!9%+&<".2)&F3%#&+9%+&),II3(3#+&(%,#I"(3/+/&9%?3&/;3A,3/&+9%+&),II3(&I("F&3%A9&
"+93(N&
J#&%#"+93(&3D%F;235&$".&/3Z.3#A3&+93&6J&+(%A+&F,A("-,"+%&"I&dcc&93%2+9$&%).2+/N&S,I+$&
,#),?,).%2/&9%?3&-33#&+%P,#H&(3H.2%(&2"<&)"/3/&"I&%/;,(,#&I"(&+93&;%/+&Vc&)%$/N&!93&"+93(&9%2I&"I&
/+.)$&/.-U3A+/&9%?3&-33#&+%P,#H&%&;2%A3-"N&Y".&I,#)&+9%+&+93&%2;9%&),?3(/,+$&I"(&+93&+(3%+F3#+&
H(".;&,/&#"+&/,H#,I,A%#+2$&),II3(3#+&I("F&+93&A"#+("2&H(".;N&="<3?3(5&+93&-3+%&),?3(/,+$&I"(&+93&
+(3%+F3#+&H(".;&,/&/,H#,I,A%#+2$&9,H93(N&'9%+&<".2)&+9%+&F3%#~&
What+are+Beta+Diversity+Metrics?+
JI&$".&(3F3F-3(&+93(3&<3(3&F.2+,;23&),?3(/,+$&F3+(,A/&+9%+&<3&./3)&I"(&%2;9%&),?3(/,+$N&
B,F,2%(2$5&+93(3&%(3&F.2+,;23&-3+%&),?3(/,+$&F3+(,A/N&O32"<&<3&<,22&A"?3(&+93&F"/+&<,)32$&./3)&
),/+%#A3&F3+(,A/&I"(&-3+%&),?3(/,+$N&
',67"%49*6(0%,4)9
&
!9,/&,/&+93&F"/+&<,)32$&./3)&,#)3DN&!93&.#,Z.3&I(%A+,"#&F3+(,A5&"(&[#,S(%A5&F3%/.(3/&+93&
;9$2"H3#3+,A&),/+%#A3&-3+<33#&/3+/&"I&+%D%&,#&%&;9$2"H3#3+,A&+(33N&J+&A".#+/&+93&-(%#A9&23#H+9/&
"I&+93&+(33&+9%+&23%)&+"&+%D%&I("F&3,+93(&"#3&3#?,("#F3#+&"(&+93&"+93(5&-.+&#"+&-"+9&K>"M.;"#3&
lccbLN&J#&+93&3Z.%+,"#5&[7O&,/&+93&[#,S(%A&),/+%#A3&-3+<33#&/%F;23&7&%#)&O5&<93(3&.#,Z.3&k&
+"+%2&.#,Z.3&-(%#A9&23#H+9&KA.F.2%+,?3&-(%#A9&23#H+9/&+9%+&23%)&+"&\![/&"-/3(?3)&"#2$&,#&
/%F;23&7&"(&/%F;23&OL&%#)&"-/3(?3)&k&+"+%2&-(%#A9&23#H+9&KA.F.2%+,?3&-(%#A9&23#H+9/&+9%+&
23%)/&+"&%22&\![/&,#&/%F;23/&7&"(&OLN&!9,/&F3+(,A&,/&/3#/,+,?3&-.+&%2/"&9%/&3F;9%/,/&"#&F,#"(&
),II3(3#A3/&,#&+93&+(33&KS.P.$%F%5&lcdlLN&
&
5)6$B0).9',6D"%49*6(0%,4)9
J#&+93&%-"?3&3D%F;235&+93&(32%+,?3&%-.#)%#A3/&"I&+%D%&,/&#"+&+%P3#&,#+"&A"#/,)3(%+,"#&K(3I3((3)&
+"&%/&.#<3,H9+3)&[#,S(%A&),/+%#A3LN&!93(3&,/&%&/3A"#)&F3+(,A&P#"<#&%/&<3,H9+3)&[#,S(%A&
),/+%#A35&+9%+&<3,H9+/&3%A9&\![&-%/3)&"#&,+`/&(32%+,?3&%-.#)%#A3N&O"+9&F3+(,A/&%(3&A(,+,A,M3)&I"(&
H,?,#H&3,+93(&+""&F.A9&K.#<3,H9+3)L&"(&+""&2,++23&K<3,H9+3)L&?%2.3&+"&(%(3&+%D%5&-.+&-"+9&9%?3&
?%2.3&,#&/9"<,#H&),II3(3#+&%/;3A+/&"I&A"FF.#,+$&),?3(/,+$N&
Y"%2S/1"6(9*6((6&6-%"6029
&
O(%$8*.(+,/&+%P3/&+93&/.F&"I&+93&),II3(3#A3/&,#&\![&%-.#)%#A3/&"?3(&+93&/.F&"I&+93&+"+%2&\![&
%-.#)%#A3/&-3+<33#&/%F;23/N&J#&+93&3Z.%+,"#&D,&,/&+93&%-.#)%#A3&"I&\![&D&,#&/%F;23&,5&%#)&DU&
,/&+93&%-.#)%#A3&"I&\![&D&,#&/%F;23&UN&JI&%#&\![&,/&%-/3#+&+93#&,+/&%-.#)%#A3&/9".2)&-3&
(3A"()3)&%/&M3("N&!93&O(%$8*.(+,/&F3+(,A&(%#H3/&I("F&c&+"&d5&<93(3&c&F3%#/&+93&+<"&/%F;23/&
9%?3&+93&/%F3&A"F;"/,+,"#&%#)&d&F3%#/&+93&+<"&/%F;23/&)"&#"+&/9%(3&%#$&\![/&K6%()3#3(5&
lcdfLN&!9,/&F3+(,A&)"3/&#"+&+%P3&(32%+3)#3//&"I&+93&"+./&,#+"&A"#/,)3(%+,"#&K;9$2"H3#$LN&
B"&%/&$".&A%#&/335&3%A9&"#3&"I&+93&),?3(/,+$&F3+(,A/&,/&/2,H9+2$&),II3(3#+5&3%A9&<,+9&,+`/&
%)?%#+%H3/&%#)&),/%)?%#+%H3/N&J#&+3(F/&"I&F3%/.(,#H&%-.#)%#A3&%#)&;9$2"H3#3+,A&),II3(3#A3/5&
3%A9&F3+(,A&,/&/.FF%(,M3)&-32"<N&
;1&&%"29#D9Y)0%9*6E)"(60293)0"64(&
3)0"64&o&!B2-#$),2c&o&G<1,.%,4)&888888888888&o&888888888888&o&888888888888&[#<3,H9+3)&[#,S(%A&o&$3/&o&
#"&'3,H9+3)&[#,S(%A&o&$3/&o&$3/&O(%$8*.(+,/&o&#"&o&$3/&&
How+Do+We+Calculate+Beta+Diversity+in+QIIME?+
'3&<,22&./3&+93&beta_diversity.py&A"FF%#)&,#&hJJ41&+"&A%2A.2%+3&-3+%&),?3(/,+$&F3+(,A/&
-3+<33#&/%F;23/&%#)&H(".;/N&!9,/&A"FF%#)&<,22&(3+.(#&%&F%+(,D&"I&+93&),/+%#A3/&"I&%22&/%F;23/&
+"&%22&"+93(&/%F;23/N&!9,/&A%#&-3&?,/.%2,M3)&%/&%&H(%;9&"I&;",#+/5&%&#3+<"(P5&"(&%#$&"+93(&A(3%+,?3&
F3+9")&$".&A%#&A"F3&.;&<,+9N&'3&/9".2)&#"+3&+9%+&/3Z.3#A,#H&)3;+9&A%#&9%?3&%#&3II3A+&"#&
-3+%&),?3(/,+$&%#%2$/,/5&U./+&%/&,+&)"3/&"#&%2;9%&),?3(/,+$N&&
beta_diversity_through_plots.py&
-i file/path/to/otu_table.biom&
-o file/path/to/beta_diversity&
-m file/path/to/mapping_file.txt&
-t file/path/to/tree&
-p file/path/to/parameters_file.txt&
-a run_parallel&
-O job_to_run&
-e sequences_per_sample&
&
!93&,#;.+&I,23&;%+9&,/&+"&+93&I,2+3(3)&"(&(%(3I,3)&\![&+%-23&,#&N-,"F&I"(F%+&K-iLN&!93&".+;.+&I,23&
;%+9&,/&<93(3&$".&<%#+&$".(&-3+%&),?3(/,+$&".+;.+&+"&-3&K-oLN&!93&+(33&I,23&;%+9&,/&+"&+93&
6(33#63#3/&ext&\![&+(33&K-tLN&!93&F3+(,A/&%(3&<9%+&$".&<".2)&2,P3&+"&./3&%/&%#&3/+,F%+3&"I&
-3+%&),?3(/,+$&%(3&/.;;2,3)&,#&+93&;%(%F3+3(/&I,23&K-pLN&!"&(.#&+9,/&,#&;%(%2232&K-aL5&<3&F./+&
/;3A,I$&+93&#.F-3(&"I&U"-/&K-O5&+93&2%-&Z.3.3&F%D&,/&fLN&JI&<3&),)#`+&(%(3I$&".(&\![&+%-235&-.+&
<%#+&%#&3?3#&)3;+9&I"(&%22&+93&/%F;23/&<3&A".2)&%2/"&/;3A,I$&+93&)3;+9&K-eLN&S"(&+93&I.22&2,/+&"I&
;%(%F3+3(/&%#)&9"<&+"&./3&+93F5&$".&A%#&2""P&%+&+93&/A(,;+&;%H3&"#&+93&hJJ41&<3-;%H30&
9++;0RRZ,,F3N"(HR/A(,;+/R-3+%g),?3(/,+$g+9(".H9g;2"+/N9+F2&
!93&beta_diversity_through_plots.py&A"FF%#)&<,22&)"&F.2+,;23&+9,#H/0&
dN *(3%+3&U"-/&<,+9,#&+93&jobs/&I"2)3(&,+&A(3%+3/5&%/&<322&%/&".+;.+&K.o##L&%#)&3(("(&I,23/&
K.e##L5&%#)&%&pbs_nodefile.txt&I,23&KU./+&2,P3&+93&"+.&;,AP,#H&/A(,;+L&
lN @%#)"F2$&/.-/%F;23&otu_table.biom&+"&3?3#&#.F-3(&"I&/3Z.3#A3/&;3(&/%F;23&
K/;3A,I,3)&<,+9&-eL&
VN @.#&beta_diversity.py&I"(&+93&),?3(/,+$&F3+(,A/&<%#+3)&K/;3A,I,3)&<,+9&+93&;%(%F3+3(/&
I,23&?,%&-pL&%#)&A(3%+3&),/+%#A3&F%+(,A3/&,#&+93&F%,#&".+;.+&),(3A+"($&Kmetric_dm.txtL&
]N X3(I"(F&%&;(,#A,;%2&A""(),#%+3/&%#%2$/,/&"#&+93&(3/.2+&"I&B+3;&V&,#&+93&F%,#&".+;.+&
),(3A+"($&Kmetric_pc.txtL&
bN 63#3(%+3&%&lG&%#)&VG&;2"+/&I"(&%22&F%;;,#H&I,32)/&,#&+93&metric_emperor_pcoa_plot/&
/.-),(3A+"(,3/&
fN G323+3/&%22&"I&,#+3(F3),%+3&H3#3(%+3)&+"&)"&+93&%#%2$/,/&
xN *(3%+3/&%&2"H&I,23&%#)&"?3(%22&(%(3I%A+,"#&;2"+&<,+9,#&+93&F%,#&".+;.+&),(3A+"($&&
!"&(.#&+9,/&hJJ41&A"FF%#)5&<3&A%#&./3&hJJ41&,#+3(%A+,?32$&"#&4BJ0&&
ssh lab&
module load qiime/1.8.0&
&
beta_diversity_through_plots.py&
-i file/path/to/otu_table.biom&
-o file/path/to/whatever_you_want&
-t /home/biol1961/shared/97_otus.tree&
-m /home/biol1961/shared/map.txt&
-p /home/biol1961/shared/parameters.txt&
&
\I&A".(/35&$".&9%?3&+"&F"),I$&+93&I,23&+"&/;3A,I$&Y\[@&I,23&;%+9/&%#)&+93&".+;.+/&+"&<9%+&$".&
<%#+&%#)&,+&/9".2)&%22&-3&"#&"#3&2,#3N&&
722&+93&;%(%F3+3(/&I"(&+93&%A+.%2&A"FF%#)&F./+&-3&%22&"#&"#3&2,#3&,#&+93&U"-&I,235&<,+9&%&/;%A3&
-3+<33#&+93&;%(%F3+3(&23++3(&%#)&?%2.3N&J#&+93&%-"?3&3D%F;23&+93$&%(3&"#&/3;%(%+3&2,#3/&/"&
+9%+&$".&A%#&(3%)&+93F&3%/,2$N&
7/&F3#+,"#3)&%-"?35&+93&A"FF%#)&<,22&(.#&,+/&"<#&U"-/&%I+3(&,+&9%/&/+%(+3)N&Y".&<,22&P#"<&
beta_diversity_through_plots.py&,/&)"#3&<93#&$".&9%?3&+93&I"22"<,#H&I,23/&,#&$".(&".+;.+&
),(3A+"($&K-oL0&
&
metric_pc.txt&K"#3&+%-23&;3(&F3+(,A5&V&+"+%2L&
metric_dm.txt&K"#3&+%-23&;3(&F3+(,A5&V&+"+%2L&
metric_emperor_pcoa_plot&R&K"#3&;3(&F3+(,A5&V&+"+%2L&
log_##.txt&K2"H&I,23L&
&
!"&2""P&%+&$".(&;2"+/5&$".&F./+&+(%#/I3(&+93&3#+,(3&;2"+&I"2)3(&I("F&4BJ&+"&$".(&A"F;.+3(N&!93&
;2"+&I,23&#33)/&"+93(&,#I"(F%+,"#&/.;;2,3)&<,+9,#&+93&/.-I"2)3(/N&
Manipulating+3D+Plots+
\#A3&$".&9%?3&F"?3)&+93&3#+,(3&;2"+&I"2)3(&I"(&$".(&F3+(,A&"I&A9",A3&+"&$".(&A"F;.+3(5&$".&
A%#&A2,AP&"#&+93&.html&I,23&<,+9&+93&),(3A+"($&+"&2"%)&+93&;2"+N&!93(3&%(3&F%#$&;%(%F3+3(/&%-".+&
+93&;2"+&$".&A%#&A9%#H3N&
o&888888888888888888&o&8888888888888888888888888888&/#-#"(&o&*9%#H3&<9%+&A"?%(,%+3&+"&A"2"(&-$&K6(6<6-602&o&
4%P3&/"F3&/%F;23/&F"(3&+(%#/;%(3#+&;4%-6,$&o&4%P3&/"F3&/%F;23/&2%(H3(&"(&/F%223(&A%<)-(&o&
7))&/%F;23&2%-32/&G>)(&o&*9%#H3&<9,A9&;(,#A,;%2&A""(),#%+3/&%(3&;2"++3)&QN06#,(&o&*9%#H3&
-%APH(".#HR%D3/&A"2"(/&%#)&/%?3&%/&,F%H3&&
?)D)"),4)(&
S.P.$%F%&:5&3+&%2N&lcdlN&X%A,I,A&B$F;"/,.F&"#&O,"A"F;.+,#HN&lcdl0ldV8l]N&6%()3#3(&4N&lcdfN&
G%+%7#%2$+,A/N"(HN.PN&
K9++;0RR<<<N)%+%%#%2$+,A/N"(HN.PRX.-2,A%+,"#/R'(,+3(/tlcO2"ARG,/+%#A3tlcF3+(,A/N9+FLN&
>"M.;"#3&*&%#)&E#,H9+&@N&lccbN&7;;2,3)&%#)&1#?,("#F3#+%2&4,A("-,"2"H$N&xd0^ll^8^lVbN&
Ordination+
'93#&<3&<%#+&+"&2""P&%+&9,H98)3F3#/,"#&)%+%5&"#3&<%$&+"&3%/,2$&?,/.%2,M3&/,F,2%(,+,3/&%#)&
),II3(3#A3/&,/&"(),#%+,"#N&!93&+$;3&"I&"(),#%+,"#&;2"+/&<3&<,22&23%(#&%-".+&%#)&H3#3(%+3&%(3&
X(,#A,;%2&*"F;"#3#+&KX*7L&%#)&*""(),#%+3&7#%2$/3/&KX*"7LN&
What+is+PCA+and+PCoA?+
J+&,/&%&<%$&"I&,)3#+,I$,#H&;%++3(#/&,#&)%+%&%#)&3D;(3//,#H&)%+%&,#&/.A9&%&<%$&%/&+"&9,H92,H9+&+93,(&
/,F,2%(,+,3/&%#)&),II3(3#A3/N&B,#A3&".(&)%+%&A%#&-3&"I&9,H9&),F3#/,"#/5&I,#),#H&+93&;%++3(#/&A%#&
-3&9%()&%#)&+9,/&,/&<93(3&X*7&%#)&X*"7&%(3&;"<3(I.2&+""2/&I"(&%#%2$M,#H&)%+%N&!93&"+93(&F%,#&
%)?%#+%H3&"I&X*7RX*"7&,/&+9%+&"#A3&$".&9%?3&I".#)&+93/3&;%++3(#/&,#&+93&)%+%&$".&A%#&
A"F;(3//&+93&)%+%&-$&(3).A,#H&+93&#.F-3(&"I&),F3#/,"#/&%#)&?,/.%2,M3&,+N&
!9,/&A"#A3;+&"I&),F3#/,"#&(3).A+,"#&A%#&-3&?3($&+(,AP$&+"&H(%/;N&[#)3(/+%#),#H&%22&+93&F%+9&
-39,#)&X*7&%#)&X*"7&,/&".+&"I&+93&/A";3&"I&+9,/&A2%//N&'3&<,225&9"<3?3(5&+($&+"&.#)3(/+%#)&9"<&
+93&)%+%&,/&(3).A3)5&<9%+&<3&%(3&%A+.%22$&;2"++,#H5&%#)&9"<&+"&+"&%AA"F;2,/9&+9,/&,#&@N&
O32"<&%(3&/"F3&+.+"(,%2/&+"&932;&./&.#)3(/+%#)&!/GN&
WO9!-)%()9")%.90B)9D#--#+6,$9+)<(60)M&
9++;/0RRH3"(H3F)%22%/N<"();(3//NA"FRlcdVRdcRVcR;(,#A,;%28A"F;"#3#+8%#%2$/,/8]8).FF,3/8
3,H3#?3A+"(/83,H3#?%2.3/8%#)8),F3#/,"#8(3).A+,"#&
XO9!-)%()9")%.90B)9D#--#+6,$9+)<(60)M&
9++;0RR/3+"/%N,"R3?R;(,#A,;%28A"F;"#3#+8%#%2$/,/&
PCA+vs+PCoA+
S("F&+93&<3-/,+3/&2,/+3)&%-"?3&<3&9%?3&23%(#3)&%-".+&X*7N&B"&<9%+z/&X*"7~&X*"7&,/&/,F,2%(&
X*75&9"<3?3(5&X*"7&A%#&9%#)23&),/+%#A3/&H3#3(%+3)&I("F&%#$&/,F,2%(,+$&"(&),//,F,2%(,+$&
F3%/.(35&/.A9&%/&O(%$q*.(+,/&%#)&-"+9&<3,H9+3)&%#)&.#<3,H9+3)&[#,S(%A&F3+(,A/N&X*"7&A%#&
%2/"&9%#)23&Z.%#+,+%+,?35&/3F,8Z.%#+,+%+,?35&Z.%2,+%+,?35&%#)&F,D3)&?%(,%-23/N&
B,F,2%(&+"&X*75&X*"7&;(").A3/&%&/3+&"I&.#A"((32%+3)&%D3/&+"&/.FF%(,M3&+93&?%(,%-,2,+$&,#&+93&
)%+%&/3+N&1%A9&%D,/&9%/&%#&3,H3#?%2.3&<9"/3&F%H#,+.)3&,#),A%+3/&+93&%F".#+&"I&?%(,%+,"#&
A%;+.(3)&,#&+9%+&%D,/N&!93&;(";"(+,"#&"I&%&H,?3#&3,H3#?%2.3&+"&+93&/.F&"I&%22&3,H3#?%2.3/&(3?3%2/&
+93&(32%+,?3&`,F;"(+%#A3`&"I&3%A9&%D,/N&7&/.AA3//I.2&X*"7&<,22&H3#3(%+3&%&I3<&Kl8VL&%D3/&<,+9&
(32%+,?32$&2%(H3&3,H3#?%2.3/5&A%;+.(,#H&F"/+&"I&+93&?%(,%+,"#&,#&+93&,#;.+&)%+%5&<,+9&%22&"+93(&
%D3/&9%?,#H&/F%22&3,H3#?%2.3/N&
J#+3(;(3+%+,"#&"I&%&X*"7&;2"+&,/&/+(%,H9+I"(<%()0&"-U3A+/&A2"/3(&+"&"#3&%#"+93(&%(3&F"(3&/,F,2%(&
+9%#&+9"/3&I.(+93(&%<%$N&B,F,2%(,+$&"(&),//,F,2%(,+$&,/&)3I,#3)&-$&+93&F3%/.(3&./3)&,#&+93&
A"#/+(.A+,"#&"I&+93&K),/L/,F,2%(,+$&F%+(,D&./3)&%/&,#;.+N&
X*"7&A%#&9%#)23&%&<,)3&(%#H3&"I&)%+%5&-.+&+93&"(,H,#%2&?%(,%-23/&A%##"+&-3&(3A"?3(3)N&!9,/&,/&
-3A%./3&X*"7&+%P3/&%&F%+(,D&)3(,?3)&I("F&+93&"(,H,#%2&)%+%&%/&,#;.+&%#)&#"+&+93&"(,H,#%2&
?%(,%-23/&+93F/32?3/N&
!93&-3+%g),?3(/,+$g+9(".H9g;2"+/N;$&A"FF%#)&H,?3/&./&X*"7&;2"+/&"I&".(&)%+%N&>%+3(&,#&+93&
A".(/3&<3&<,22&%2/"&23%(#&9"<&+"&H3#3(%+3&+93/3&;2"+/&".(/32?3/&,#&@N&
Summarizing+Taxa+
What+are+Taxa+Summaries?+
B.FF%(,M,#H&+%D%&,/&%&<%$&+"&?,/.%2,M3&<9,A9&+%D%&%(3&I".#)&,#&".(&/%F;23/N&'93#&<3&
/.FF%(,M3&+%D%&<3&A%#&./3&+93&?%(,"./&23?32/&"I&+%D"#"F$N&!93&I"22"<,#H&23?32/&%(3&+9"/3&
)3#"+3)&-$&6(33#63#3/&I"(&+%D"#"F$N&&
>3?32&o&!%D"#"F$&o&1D%F;23&888888&o&888888888888888888&o&8888888888888888888888888&d&o&E,#H)"F&o&O%A+3(,%&
l&o&X9$2.F&o&7A+,#"-%A+3(,%&
V&o&*2%//&o&7A+,#"-%A+3(,%&
]&o&\()3(&o&7A+,#"F$A3+%23/&
b&o&S%F,2$&o&B+(3;+"F$A3+%A%3%&
f&o&63#./&o&B+(3;+"F$A3/&
x&o&B;3A,3/&o&F,(%-,2,/&
&
J#&hJJ415&<3&A%#&./3&23?32/&l8f&+"&/.FF%(,M3&+%D%N&'3&A%#`+&./3&23?32&d&-3A%./3&+9%+&<".2)&
(3/.2+&,#&#"&/.FF%($&K%22&"I&".(&\![/&%(3&-%A+3(,%LN&'3&%2/"&A%##"+&./3&23?32&x5&-3A%./3&./,#H&
ext&,)3#+,+$&"I&%&dfB&H3#3&A%##"+&(3/"2?3&/;3A,3/&I("F&"#3&%#"+93(&KI"(&+93&F"/+&;%(+LN&'3&
/.FF%(,M3&+%D%&<,+9&+93&summarize_taxa_through_plots.py&A"FF%#)&,#&hJJ41N&
How+do+we+actually+summarize+taxa?+
J#&hJJ415&+9,/&+%/P&,/&;3(I"(F3)&"#&$".(&(%(3I,3)&"(&I,2+3(3)&\![&+%-23N&J+&F./+&-3&%&*\[Q!&
+%-235&#"+&(32%+,?3&%-.#)%#A3N&!93&hJJ41&A"FF%#)&summarize_taxa_through_plots.py&
+%P3/&$".(&\![&+%-23&%#)&A"22%;/3/&+93&+%-23&,#+"&+93&?%(,"./&+%D"#"F,A&23?32/N&J+&<,22&+93#&;2"+&
+93&+%D%&/.FF%(,M3&I"(&./N&
!93&A"FF%#)&,/&(.#&./,#H&+93/3&;%(%F3+3(/0&
summarize_taxa_through_plots.py&
-i file/path/to/otu_table.biom&
-o file/path/to/summary_output&
-m file/path/to/mapping_file.txt&
-c category_to_use&&
!93&,#;.+&I,23&;%+9&,/&+"&+93&I,2+3(3)&"(&(%(3I,3)&\![&+%-23&,#&.biom&I"(F%+&K-iLN&!93&".+;.+&I,23&
;%+9&,/&<93(3&$".&<%#+&$".(&+%D%&/.FF%($&),(3A+"($&+"&-3&K-oLN&!93&F%;;,#H&I,23&;%+9&,/&+93&
2"A%+,"#&"I&+93&F%;;,#H&I,23&K-mLN&!93&A%+3H"($&$".&<".2)&2,P3&+"&./3&I"(&+93&/.FF%(,M3&F./+&-3&
%&A"2.F#&93%)3(&,#&+93&F%;;,#H&I,23&K-cLN&JI&$".&23%?3&+9,/&;%(%F3+3(&".+5&hJJ41&<,22&F%P3&+93&
/.FF%(,3/&%#)&;2"+/&./,#H&+93&3#+,(3&\![&+%-23N&
S"(&+93&I.22&2,/+&"I&;%(%F3+3(/&%#)&9"<&+"&./3&+93F5&$".&A%#&2""P&%+&+93&A"FF%#)&;%H3&"#&+93&
hJJ41&<3-;%H30&9++;0RRZ,,F3N"(HR/A(,;+/R/.FF%(,M3g+%D%N9+F2&
The summarize_taxa_through_plots.py&A"FF%#)&<,220&
dN *(3%+3&%#&".+;.+&),(3A+"($&#%F3)&<9%+3?3(&$".&/;3A,I,3)&I"(&-o&
lN *(3%+3&\![&+%-23/&A"22%;/3)&%+&3%A9&+%D"#"F,A&23?32&Kl8fL&<,+9&/%F;23/&H(".;3)&%AA"(),#H&
+"&$".(&-c&;%(%F3+3(&
VN *(3%+3&+%D%&/.FF%($&;2"+/&,#&%&/.-),(3A+"($&A%223)&taxa_summary_plots&
O32"<&,/&%#&3D%F;23&"I&+93&A"#+3#+/&"I&+93&".+;.+&),(3A+"($N&!93&+%D%&/.FF%($&<%/&;(").A3)&
./,#H&+93&A%+3H"($&`/3D`&I("F&+93&F%;;,#H&I,23N&JI&$".&<%#+&+"&2""P&%+&+93&+%D%&/.FF%($&;2"+/5&
$".&F./+&F"?3&+9%+&3#+,(3&/.-),(3A+"($&+"&$".(&;3(/"#%2&A"F;.+3(&+"&?,3<&+93&9+F2&;2"+/N&
&
!"&(.#&+9,/&hJJ41&A"FF%#)5&<3&F./+&/.-F,+&%&U"-&I,23N&Y".(&U"-&I,23&/9".2)&2""P&2,P3&+9,/0&
#!/bin/bash -l&
#PBS -l nodes=1:ppn=16,mem=2Gb,walltime=6:00:00&
#PBS -m abe&
#PBS -M x500@umn.edu&
#PBS -o job_name_stout&
#PBS -e job_name_stderr&
cd /home/biol1961/x500&
module load qiime/1.8.0&
summarize_taxa_through_plots.py&
-i file/path/to/otu_table.biom&
-o file/path/to/whatever_you_want&
-m file/path/to/mapping_file.txt&
-c category_to_use&
722&+93&;%(%F3+3(/&I"(&+93&%A+.%2&A"FF%#)&F./+&-3&%22&"#&"#3&2,#3&,#&+93&U"-&I,235&<,+9&%&/;%A3&
-3+<33#&+93&;%(%F3+3(&23++3(&%#)&?%2.3N&J#&+93&%-"?3&3D%F;23&+93$&%(3&"#&/3;%(%+3&2,#3/&/"&
+9%+&$".&A%#&(3%)&+93F&3%/,2$N&
O32"<&,/&%#&3D%F;23&"I&<9%+&+93&+%D%&/.FF%($&;2"+/&I("F&hJJ41&2""P&2,P3N&!9,/&3D%F;23&,/&./,#H&
+93&A%+3H"($&`/3D`&I("F&+93&F%;;,#H&I,23N&!93&;2"+/&;(").A3)&-$&hJJ41&%(3&#"+&;(3++$&"(&3%/$&+"&
(3%)N&!"&F%P3&3%/,3(&+"&,#+3(;(3+&+%D%&/.FF%($&;2"+/5&<3&A%#&./3&@N&
&
Example:taxa:summary:from:QIIME:
Plotting+and+Statistics+
d=B)")96(9,#9(0%06(064%-90##-90B%096(9%(9N#+)"D1-9%(9%9+)--94B#(),9$"%NBd&
S/B%,&<)"(9)09%-O9WefC&
'93#&<3&-3H,#&+"&%#%2$M3&".(&)%+%&,+&,/&,F;"(+%#+&+"&-3&%-23&+"&?,/.%2,M3&".(&"-/3(?%+,"#/N&'9$&
,/&+9,/~&
p X2"+/&%(3&F"(3&3II3A+,?3&,#&A(3%+,#H&,#+3(3/+&%#)&,#&%;;3%2,#H&+93&%++3#+,"#&"I&"+93(/&
p a,/.%2&(32%+,"#/9,;/&%(3&F"(3&3%/,2$&H(%/;3)&%#)&(3F3F-3(3)&
p X2"+/&/%?3&+,F35&/,#A3&+(3#)/&%#)&),II3(3#A3/&A%#&-3&?,/.%2,M3)&%+&%&H2%#A3&
p X2"+/&A%#&-(,#H&".+&9,))3#&+(3#)/&%#)&(32%+,"#/9,;/&%#)&%,)&,#&%#%2$+,A%2&+9,#P,#H&
Variables+and+Data+Types+
O3I"(3&<3&+($&%#)&?,/.%2&".(&)%+%&<3&#33)&+"&,)3#+,I$&".(&?%(,%-23&%#)&)%+%&+$;3/N&!9,/&2,/+&,/&#"+&
3D9%./+,?35&-.+&,#A2.)3/&+93&F%,#&A9%(%A+3(,/+,A/&<3&#33)&+"&+9,#P&%-".+N&'93#&<3&+%2P&%-".+&
".(&)%+%5&./.%22$&+93&)3;3#)3#+&?%(,%-23&<,22&-3&%&F3%/.(3F3#+&"I&+93&\![/&"(&+%D%&KI"(&
3D%F;235&%2;9%&),?3(/,+$&F3%/.(3F3#+/L&%#)&+93&,#)3;3#)3#+&?%(,%-23&<,22&-3&/"F3+9,#H&I("F&
".(&F%;;,#H&I,23N&B"F3+,F3/&<3&<,22&A%22&+93&,#)3;3#)3#+&?%(,%-23&,#&+93&F%;;,#H&I,23&%&
4#E%"6%0)5&<9,A9&,/&%&?%(,%-23&+9%+&F,H9+&-3&;(3),A+,?3&"I&+93&".+A"F3&"I&+93&/+.)$N&
K%"6%<-)(9
*)N),.),09K%"6%<-)(&
%(3&<9%+&<3&F3%/.(3)&,#&+93&3D;3(,F3#+&%#)&<9%+&<3(3&%II3A+3)&).(,#H&+93&3D;3(,F3#+N&!93&
)3;3#)3#+&?%(,%-23&(3/;"#)/&+"&+93&,#)3;3#)3#+&?%(,%-23N&Y".&A%##"+&9%?3&%&)3;3#)3#+&
?%(,%-23&<,+9".+&%#&,#)3;3#)3#+&?%(,%-23N&\#&%&H(%;95&+9,/&,/&+93&$&?%(,%-23N&
F,.)N),.),09K%"6%<-)(&
%(3&+93&?%(,%-23/&<3&9%?3&A"#+("2&"?3(5&<9%+&<3&A%#&A9""/3&%#)&F%#,;.2%+3N&!93$&%(3&./.%22$&
<9%+&<3&+9,#P&<,22&%II3A+&+93&)3;3#)3#+&?%(,%-23N&J#&/"F3&A%/3/5&<3&F%$&#"+&-3&%-23&+"&
F%#,;.2%+3&+93&,#)3;3#)3#+&?%(,%-23N&J+&F%$&-3&/"F3+9,#H&"-/3(?%+,"#%2&+9%+&,/&%2(3%)$&+93(3&
%#)&,/&I,D3)&K/3D5&),/3%/3&/+%+./5&A"2"(LN&\#&%&H(%;95&+9,/&,/&+93&D&?%(,%-23N&
*%0%9=2N)(9
[1%-60%06E)&
)%+%&,/&)3/A(,;+,?35&,+&,/&"-/3(?3)&%#)&#"+&F3%/.(3)N&J+&,/&"I+3#&A%+3H"(,A%2&KA"2"(5&/F3225&+%/+3LN&
h.%#+,+%+,?3&)%+%&,/&#.F3(,A&%#)&A%#&-3&A".#+3)&"(&F3%/.(3)&K23#H+95&93,H9+5&?"2.F35&<3,H9+LN&
*6(4")0)&
)%+%&A%#&"#2$&+%P3&"#&%&I,#,+3&#.F-3(&"I&?%2.3/5&%#)&,/&A".#+3)N&722&Z.%2,+%+,?3&?%(,%-23/&%(3&
),/A(3+3N&B"F3&Z.%#+,+%+,?3&?%(,%-23/&%(3&),/A(3+35&/.A9&%/&),/3%/3&/A"(3&(%+3)&%/&d5l5V5]5&"(&)%$&
/%F;23)&,I&;3";23&<3(3&"#2$&/%F;23)&"#&%&/;3A,I,A&I,#,+3&#.F-3(&"I&)%$/&K)%$&d&%#)&db&"#2$LN&
/#,06,1#1(&
)%+%&A%#&+%P3&"#&%#$&?%2.3&,#&%&A3(+%,#&(%#H3N&Q"&F3%/.(3)&?%(,%-23&,/&+(.2$&A"#+,#."./5&
9"<3?3(5&),/A(3+3&?%(,%-23/&F3%/.(3)&<,+9&3#".H9&;(3A,/,"#&A%#&"I+3#&-3&A"#/,)3(3)&
A"#+,#."./&I"(&;(%A+,A%2&;.(;"/3&K2,P3&%H3&F3%/.(3)&;3(&)%$5&"(&<3,H9+LN&
Types+of+Plots+
!93(3&%(3&#.F3("./&+$;3/&"I&;2"+/&<3&A%#&./3N&=3(3&%(3&%&I3<&?3($&A"FF"#&+$;3/&"I&;2"+/5&%#)&
%&-(,3I&3D;2%#%+,"#&%/&+"&<9%+&+$;3&"I&)%+%&+93$&./35&<9%+&+93$&),/;2%$5&%#)&<93#&<3&/9".2)&
./3&+93FN&
!6)9/B%"09
X,3&A9%(+/&%(3&./3)&<,+9&),/A(3+3&,#)3;3#)3#+&?%(,%-23/N&X,3&A9%(+/&%(3&-3/+&+"&./3&<93#&$".&%(3&
+($,#H&+"&A"F;%(3&;%(+/&"I&%&<9"23&K;3(A3#+%H3&"(&;(";"(+,"#%2&)%+%LN&X,3&A9%(+/&/9".2)&-3&./3)&
I"(&),/;2%$,#H&)%+%&<,+9&#"&F"(3&+9%#&f&A%+3H"(,3/N&!93$&)"&#"+&/9"<&A9%#H3/&"?3(&+,F3N&!93$&
%(3&#"+&./3)&"I+3#&,#&/A,3#+,I,A&(3/3%(A9N&
Y%"9/B%"09
O%(&H(%;9/&%(3&./3)&<,+9&),/A(3+3&,#)3;3#)3#+&?%(,%-23/N&O%(&H(%;9/&A%#&-3&9"(,M"#+%2&KD&%D,/&"#&
/,)3L&"(&?3(+,A%2&KD&%D,/&,/&"#&+93&-"++"FLN&!93&93,H9+&"I&3%A9&-%(&K)3;3#)3#+&?%(,%-235&$&
?%(,%-23L&%(3&/A%23)&%AA"(),#H&+"&+93,(&?%2.3/&%#)&+93&-%(/&A%#&-3&A"F;%(3)&+"&3%A9&"+93(N&O%(&
H(%;9/&9%?3&%&/;%A3&-3+<33#&3%A9&-%(N&B+%AP3)&-%(&A9%(+/&A%#&-3&./3)&+"&A"F;%(3&"?3(%22&
Z.%#+,+,3/&%A("//&,+3F/&<9,23&,22./+(%+,#H&+93&A"#+(,-.+,"#&"I&3%A9&A%+3H"($&+"&+93&+"+%2N&
H6(0#$"%&9
=,/+"H(%F/&%(3&./3)&<,+9&A"#+,#."./&,#)3;3#)3#+&?%(,%-23/N&=,/+"H(%F/&A%#&-3&9"(,M"#+%2&KD&
%D,/&"#&/,)3L&"(&?3(+,A%2&KD&%D,/&,/&"#&+93&-"++"FLN&!93&93,H9+&"I&3%A9&-%(&K)3;3#)3#+&?%(,%-235&$&
?%(,%-23L&%(3&/A%23)&%AA"(),#H&+"&+93,(&?%2.3/&%#)&+93&-%(/&A%#&-3&A"F;%(3)&+"&3%A9&"+93(N&
=,/+"H(%F/&)"&#"+&9%?3&%&/;%A3&-3+<33#&3%A9&-%(N&
&
Figure'1.'Examples'of'bar'charts'and'a'pie'chart'encoding'the'same'data.:(a):Values:in:
different:categories:are:difficult:to:compare:in:pie:charts.:(b):Stacked:bar:charts:enable:
comparison:of:overall:values:across:items.:(c):Layered:bar:charts:support:comparison:of:values:
within:categories.:(d):Grouped:histograms:allow:comparison:of:values:across:categories:(Streit:
and:Gehlenborg,:2014).:
Y#>N-#09
O"D&;2"+/&%(3&./3)&<,+9&),/A(3+3&,#)3;3#)3#+&?%(,%-23/N&O"D&;2"+/&A%#&-3&9"(,M"#+%2&"(&?3(+,A%2N&
O"D&;2"+/&/9"<&+93&I.22&(%#H3&"I&?%(,%+,"#&KI("F&F,#&+"&F%DL5&+93&2,P32$&(%#H3&"I&?%(,%+,"#&K+93&
,#+3(Z.%(+,23&(%#H35&Jh@L5&%&+$;,A%2&?%2.3&K+93&F3),%#L&%#)&".+2,3(/&K?%2.3/&V&+,F3/&+93&Jh@LN&
!93$&;("?,)3&F"(3&,#I"(F%+,"#&+9%#&%&-%(&A9%(+N&
&
Figure'2.'A'comparison'of'bar'graphs'and'box'plots.:(a):Bar:chart:showing:sample:means:with:
standard-deviation:error:bars.:(b):Box:plot:with:whiskers:extending:to:?:1.5:times:the:
interquartile:range.:(c):Distributions:of:the:different:data:sets.:(Streit:and:Gehlenborg,:2014).:
;4%00)"9!-#09
BA%++3(&;2"+/&%(3&./3)&<93#&-"+9&+93&,#)3;3#)3#+&%#)&)3;3#)3#+&?%(,%-23/&%(3&Z.%#+,+%+,?3N&
!93$&/9"<&9"<&F.A9&"#3&?%(,%-23&,/&%II3A+3)&-$&%#"+93(5&%2/"&A%223)&+93,(&A"((32%+,"#N&!93&
A2"/3(&+93&)%+%&;",#+/&A"F3&<93#&;2"++3)&+"&F%P,#H&%&/+(%,H9+&2,#35&+93&9,H93(&+93&A"((32%+,"#&
-3+<33#&+93&+<"&?%(,%-23/5&"(&+93&/+("#H3(&+93&(32%+,"#/9,;N&BA%++3(&;2"+/&A%#&%2/"&932;&./&/33&
)%+%&+9%+&A2./+3(&+"H3+93(&,#&A3(+%,#&%(3%/&"I&+93&/A%++3(&;2"+N&
&
Figure'3.'An'example'scatter'plot'of'the'percent'of'the'microbiome'that'is'H.'Pylori'and'
obesity:(Lender:et:al,:2014).:
A6,)9g"%NB9
>,#3&H(%;9/&%(3&./3)&<93#&-"+9&+93&,#)3;3#)3#+&%#)&)3;3#)3#+&?%(,%-23/&%(3&Z.%#+,+%+,?3N&
>,#3&H(%;9/&%(3&2,P3&/A%++3(&;2"+/&3DA3;+&%&2,#3&,/&A(3%+3)&A"##3A+,#H&3%A9&)%+%&;",#+&+"H3+93(N&
!9,/&3F;9%/,M3/&2"A%2&A9%#H3/&I("F&"#3&;",#+&+"&+93&#3D+N&[#2,P3&/A%++3(&;2"+/5&2,#3&H(%;9/&)"&
#"+&./.%22$&932;&./&)3+3A+&A"((32%+,"#/5&%/&+93&2,#3&3F;9%/,M3/&;",#+8+"8;",#+&A9%#H3/N&
&
Figure'3.'An'example'of'a'line'graphs.:These:graphs:emphasize:changes:in:specific:bacterial:
taxa:with:and:without:probiotic:supplementation:(Rutten:et:al,:2015).:
Statistical+Analysis+
dH1&%,(9%"),R090##9$##.9%09.6(4)",6,$9(1<0-)9N%00)",(90B%09%")9")%--290B)")J9<109)Z1%--29(#9%09
6&%$6,$90B)&9+B),90B)29%")9%--90#$)0B)"9%<(),0Od&
S9PE)"6009%,.9H#0B#",J9XaWa&
Summary+Statisitcs+
3#.)&
8&)%+%&?%2.3&+9%+&"AA.(/&F"/+&I(3Z.3#+2$&
3).6%,&
8&G%+%&?%2.3&+9%+&"AA.(/&%+&+93&;(3A,/3&F,))23&"I&%22&)%+%&;",#+/&
3)%,&
8&Q.F3(,A%2&%?3(%H3&"I&%22&+93&)%+%&;",#+/&
K%"6%,4)&
8&43%/.(3&"I&+93&/;(3%)&"I&+93&)%+%5&%#)&,/&+93&%?3(%H3&"I&+93&/Z.%(3)&),II3(3#A3/&I("F&+93&
F3%#&
;0%,.%".9*)E6%06#,(&
BZ.%(3&(""+&"I&+93&?%(,%#A3&
F,0)"Z1%"06-)9?%,$)&
8&'93(3&+93&`F,))23`&I,I+$&;3(A3#+&"I&+93&)%+%&%(3&2"A%+3)&
*6(0"6<106#,&
8&>,/+,#H&"(&I.#A+,"#&/9"<,#H&%22&+93&;"//,-23&?%2.3/&"I&+93&)%+%&%#)&9"<&"I+3#&+93$&"AA.(N&
J#A2.)3/&#"(F%25&/P3<3)5&.#,I"(F5&X",//"#&%#)&"+93(/&KB33&S,H.(3&bL&
&
Figure'5.'Examples'of'various'distributions'with'different'standard'deviations.:
Statistical+Tests+
!%"%&)0"649;0%06(064(9
X%(%F3+(,A&%#%2$/3/&%(3&+93&"2)3/+&%#)&F"/+&A"FF"#2$&./3)&+$;3&"I&%#%2$/,/N&'3&<,22&A"?3(&+93&
+9(33&F"/+&A"FF"#0&A"((32%+,"#5&+8+3/+5&%#)&%#%2$/,/&"I&?%(,%#A3N&722&;%(%F3+(,A&/+%+,/+,A/&9%?3&
+9(33&A"FF"#&%//.F;+,"#/&+9%+&F./+&-3&F3+&-3I"(3&;("A33),#HN&J#&S,H.(3&b5&+93&+";&3D%F;23&
A".2)&-3&+3/+3)&<,+9&%&;%(%F3+(,A&+3/+N&
dN&722&"-/3(?%+,"#/&%(3&,#)3;3#)3#+&"I&"+93(&"-/3(?%+,"#/&K;(").A+&"I&3D;3(,F3#+%2&)3/,H#5&#"&
+3/+&#33)3)LN&lN&!93&)%+%&%(3&#"(F%22$&),/+(,-.+3)&K3%/,2$&+3/+3)&-$&3D%F,#,#H&+93&),/+(,-.+,"#LN&
VN&!93&?%(,%#A3/&,#&+93&),II3(3#+&+(3%+F3#+&H(".;/&%(3&+93&/%F3N&K@3Z.,(3/&%&+3/+5&/.A9&%/&+93&S8
4%D&!3/+L&
!"#$%&"'()"*+%(")
!9,/&%#%2$/,/&,/&./3)&<93#&$".&%(3&A"F;%(,#H&+<"&),II3(3#+&/%F;23/N&7&B+.)3#+`/&+8+3/+&<,22&
(3;"(+&%&+8/+%+,/+,A&%#)&%&;("-%-,2,+$&?%2.3&K;8?%2.3LN&JI&+93&;8?%2.3&,/&H(3%+3(&+9%#&"(&3Z.%2&+"&".(&
".(&%2;9%&K./.%22$&cNcbL&<3&(3U3A+&".(&#.22&9$;"+93/,/&+9%+&+93(3&,/&%&/,H#,I,A%#+&),II3(3#A3&
-3+<33#&+93&H(".;/N&
,&-./(0()12)3-40-&5%)6,783,9)
7#%2$/,/&"I&?%(,%#A3&,/&./3)&+"&)3+3(F,#3&,I&),II3(3#A3/&3D,/+&-3+<33#&F"(3&+9%#&+<"&+(3%+F3#+&
H(".;/N&!93&%//.F;+,"#/&"I&7Q\a7&%(3&,)3#+,A%2&+"&+93&+8+3/+&%#)&+93&A%2A.2%+3)&/+%+,/+,A&,/&
A%223)&%#&S8?%2.35&<,+9&%&A"((3/;"#),#H&;8?%2.3N&7/&<,+9&+93&+8+3/+5&,I&".(&;("-%-,2,+$&?%2.3&,/&23//&
+9%#&cNcb&<3&(3U3A+&".(&#.22&9$;"+93/,/&K,#&+9,/&A%/3&+9%+&+93(3&,/&#"&),II3(3#A3&%F"#H&+93&
+(3%+F3#+&H(".;/LN&!9,/&;8?%2.3&"#2$&+322/&./&,I&+93(3&%(3&/,H#,I,A%#+&),II3(3#A3/&%F"#H&".(&
H(".;/N&J+&)"3/&#"+&+322&./&<93(3&+93/3&),II3(3#A3/&%(3N&
:%;4%((01&)
@3H(3//,"#&,/&./3)&+"&)3+3(F,#3&<93+93(&+<"&?%(,%-23/&%(3&(32%+3)N&7&9,H92$&./3)&(3H(3//,"#&
F3+9")&,/&X3%(/"#`/&(N&!93&(&/+%+,/+,A&9%/&%&(%#H3&"I&?%2.3/&I("F&8dNcc&K%&;3(I3A+&#3H%+,?3&
A"((32%+,"#L&+"&dNcc&K%&;3(I3A+&;"/,+,?3&A"((32%+,"#LN&7&#3H%+,?3&A"((32%+,"#&F3%#/&+9%+&%/&"#3&
?%(,%-23&,#A(3%/3/&,#&/,M35&+93&"+93(&)3A(3%/3/N&7&;"/,+,?3&A"((32%+,"#&F3%#/&+9%+&%/&"#3&?%(,%-23&
,#A(3%/3/&/"&)"3/&+93&"+93(N&'93#&(kcNcc&+93(3&,/&#"&(32%+,"#/9,;&-3+<33#&+93&+<"&?%(,%-23/N&
!9,/&+3/+&9%/&+93&/%F3&+9(33&%//.F;+,"#/&%/&"+93(&;%(%F3+(,A&%#%2$/3/5&-.+&,+&%2/"&9%/&+93&
%)),+,"#%2&%//.F;+,"#&+9%+&+93&(32%+,"#/9,;&-3+<33#&+93&+<"&?%(,%-23/&,/&2,#3%(N&7&(3H(3//,"#&
%#%2$/,/&%2/"&H,?3/&%&A"3II,A,3#+&"I&?%(,%+,"#&K@lLN&!93&A"3II,A,3#+&"I&?%(,%+,"#&9%/&%&(%#H3&"I&
?%2.3/&I("F&ct8dcct&%#)&3D;2%,#/&9"<&F.A9&"I&+93&?%(,%+,"#&,#&+93&)3;3#)3#+&?%(,%-23&,/&
-3A%./3&"I&+93&,#)3;3#)3#+&?%(,%-23N&
I#,N%"%&)0"649(0%06(064(9
4"/+&#"#;%(%F3+(,A&/+%+,/+,A/&%(3&/,F;23&+"&./35&)"&#"+&(3Z.,(3&2%(H3&)%+%&/3+/5&%#)&9%?3&I3<&
.#)3(2$,#H&%//.F;+,"#/N&!93$&%(3&#"+&%/&;"<3(I.2&%/&;%(%F3+(,A&/+%+,/+,A/&K,N3N&+93$&%(3&#"+&?3($&
H"")&%+&)3+3A+,#H&/F%22&),II3(3#A3/&-3+<33#&H(".;/L5&Q"#8;%(%F3+(,A&+3/+/&%22&%//.F3&
,#)3;3#)3#A3&"I&"-/3(?%+,"#/N&J#&H3#3(%25&+93/3&+3/+/&/9".2)&-3&A9"/3#&"?3(&;%(%F3+(,A&
%2+3(#%+,?3/&<93#&/%F;23&/,M3/&%(3&/F%22&K23//&+9%#&dc8lc&(3;2,A%+3/LN&'3&<,22&./3&+9(33&#"#8
;%(%F3+(,A&+3/+/&,#&+9,/&A".(/3N&
<0.51=0&'():-&>)!#?()+%(")-&$)"@%)A-&&*<@0"&%/)B)+%(")
!93/3&%#%2$/3/&%(3&./3)&+"&+3/+&I"(&),II3(3#A3/&-3+<33#&+<"&+(3%+F3#+&H(".;/&%#)&%(3&
%#%2"H"./&+"&%&+8+3/+N&
C4#(>-..*<-..0()+%(")14)-$1&0()
!93/3&+3/+/&I"(&),II3(3#A3/&-3+<33#&F"(3&+9%#&+<"&),II3(3#+&+(3%+F3#+&H(".;/N&!93$`(3&
-%/,A%22$&#"#;%(%F3+(,A&7Q\a7/N&
!D%-4?-&'()E144%.-"01&)
!9,/&%#%2$/,/&,/&%&#"#8;%(%F3+(,A&(3H(3//,"#&%#%2$/,/N&
Choosing+a+Test+
O32"<&,/&%&I2"<A9%(+&<3&<,22&./3&+"&932;&./&;,AP&<9,A9&+3/+&+"&./3&K63(<,3#5&lcdfLN&
&
?)D)"),4)(9
1?3(,++&OB&%#)&="+9"(#&!N&lcdcN&7&9%#)-""P&"I&/+%+,/+,A%2&%#%2$/3/&./,#H&@N&*@*&X(3//N&
63(<,3#&@N&lcdfN&7&;%,#23//&H.,)3&+"&/+%+,/+,A/N&
9++;0RR%-%A./N-%+3/N3).RsH%#)3(/"R-,"2"H$R(3/".(A3/R/+%+,/+,A/N9F+2&
>3#)3(&Q5&3+&%2N&lcd]N&72,F3#+%($&X9%(F%A"2"H$&%#)&!93(%;3.+,A/N&]c0l]8VdN&
@.++3#&@O445&3+&%2N&lcdbN&X>"B&\Q1N&dc0&3cdVxf^dN&
B+(3,+&4&%#)&63923#-"(H&QN&lcd]N&Q%+.(3&43+9")/Ndd0ddxN&
Using+R+Studio+
!9,/&+.+"(,%2&<,22&932;&./&23%(#&9"<&+"&./3&@B+.),"N&
RStudio+interface+
!93&I,(/+&+,F3&<3&";3#&@B+.),"&<3&%(3&H(33+3)&-$&+9(33&;%#32/N&!93&23I+&9%2I&"I&+93&/A(33#&,/&+93&
A"#/"23N&!93&.;;3(&(,H9+&A"(#3(&,/&+93&?%(,%-23&,#/;3A+"(5&%#)&+93&2"<3(&(,H9+&A"(#3(&A%#&/9"<&
$".&),II3(3#+&+9,#H/&)3;3#),#H&"#&<9,A9&+%-&,/&/323A+3)N&!93&)3I%.2+&I"(&+9,/&;%#32&,/&%&I,23&
?,3<3(N&>3+`/&H"&+9(".H9&3%A9&;%#32&F"(3&,#&)3;+9N&
&
This:is:what:RStudio:looks:like:
Console+
!93&A"#/"23&,/&2,P3&".(&+3(F,#%2N&=3(3&<3&A%#&+$;3&A"FF%#)/&%#)&@&<,22&;3(I"(F&+93FN&!93&
+$;3/&"I&A"FF%#)/&<3&./3&,#&+93&@&A"#/"23&%(35&I"(&+93&F"/+&;%(+5&/;3A,I,A&+"&+93&@&A"),#H&
2%#H.%H3N&!9,/&2%#H.%H3&,/&),II3(3#+&+9%#&+93&9"<&$".&<".2)&<(,+3&A"FF%#)/&I"(&$".(&+3(F,#%2N&&
Environment/Variable+inspector+
7/&<3&2"%)&%#)&F%#,;.2%+3&)%+%5&<3&A%#&/+"(3&+93&)%+%&%/&%&`?%(,%-23`N&!93&3#?,("#F3#+&<,#)"<&
/9"</&./&<9,A9&?%(,%-23/&<3&9%?3&A(3%+3)5&%#)&<3&A%#&%A+.%22$&2""P&+"&/33&<9%+&+93$&%(3N&
Q"+,A3&+9,/&;%#32&%2/"&9%/&%&9,/+"($&+%-&<93(3&$".&A%#&/33&%22&"I&+93&(3A3#+&A"FF%#)/&$".&9%?3&
;3(I"(F3)N&&
File+System+
!9,/&,/&U./+&2,P3&+93&`I,#)3(`&"#&%&4%A&"(&+93&`<,#)"</&3D;2"(3(`&"#&%&X*N&!9,/&2,/+/&".(&I,23/&%#)&
I"2)3(/N&Q"+,A3&+9,/&;%#32&9%/&"+93(&+%-/&%/&<322N&!93&;2"+/&+%-&<,22&/9"<&./&<9%+&".(&;2"+/&2""P&
2,P3&%/&<3&A(3%+3&+93F5&+93&;%AP%H3/&+%-&/9"</&./&%22&"I&+93&;%AP%H3/&K/3+/&"I&I.#A+,"#/L&$".&
A%#&,F;"(+&<93#&(.##,#H&$".(&%#%2$/,/N&!93&?,3<3(&+%-&,/&I"(&F"(3&%)?%#A3)&,#+3(%A+,?3&
H(%;9,A/&%#)&<"#`+&-3&./3)&,#&+9,/&A".(/3N&&
Dropdown+menus+
!93/3&%(3&+93&";+,"#/&2,/+3)&%A("//&+93&+";&"I&@B+.),"&KU./+&2,P3&F"/+&"+93(&;("H(%F/&"#&".(&
A"F;.+3(/LN&!93/3&F3#./&,#A2.)3&F%#$&";+,"#/&$".&F,H9+&#33)N&S"(&3D%F;235&S,235&B%?3&<,22&
/%?3&".(&<"(PN&
Using+RStudio+
!"&-3H,#&./,#H&@/+.),"&%22&<3&9%?3&+"&)"&,/&+$;3&%&A"FF%#)&,#+"&+93&A"#/"23N&S"(&3D%F;235&23+`/&
F%P3&%&?%(,%-23&A%223)&test_variableN&7#)&,#&+9,/&?%(,%-23&<3&<,22&/+"(3&/"F3&)%+%N&!93&)%+%&
<,22&-3&%&/3+&"I&<"()/&Ki-,()i5&i)"Hi5&iA%+iLN&'3&%2/"&A%22&<"()/&/+(,#H/5&-3A%./3&+93&/+(,#H&"I&
A9%(%A+3(/&)"3/#`+&#3A3//%(,2$&9%?3&+"&-3&%#&%A+.%2&<"()N&S"(&3D%F;235&i-,()i&,/&%&/+(,#H&%#)&/"&
,/&i%-A)iN&
test_variable <- c("bird", "dog", "cat")&
Q"+,A3&%&A".;23&"I&+9,#H/0&
dN +93&%(("<&<-&,/&<9%+&%//,H#/&+93&)%+%&+"&%&?%(,%-23N&Y".&A%#&(3%)&+93&<9"23&A"FF%#)&2,P3&
+9,/0&i+3/+g?%(,%-23&H3+/&%&?3A+"(&A"#+%,#,#H&+93&/+(,#H/&-,()5&)"H5&A%+i&
lN '93#&<3&<%#+&+"&H(".;&+9,#H/&,#+"&"#3&?%(,%-23&<3&A%#&./3&c()5&<9,A9&,/&%&I.#A+,"#&+9%+&
A"F-,#3/&?%2.3/&,#+"&%&?3A+"(&"(&2,/+N&
VN '93#&<3&<%#+&+"&/+"(3&/+(,#H/&<3&9%?3&+"&/;3A,I$&,+`/&%&/+(,#H&./,#H&Z."+3/&""N&'3&A%#&./3&
3,+93(&)".-23&""&"(&/,#H23&''&Z."+3/&Ki-,()i&"(&`-,()`LN&JI&<3&+(,3)&+"&./3&c(bird, dog,
cat)&,#/+3%)&"I&c("bird", "dog", "cat")&@&<".2)&(3%)&%#)&+($&+"&,#+3(;(3+&-,()5&)"H&
%#)&A%+&+"&-3&?%(,%-23/5&#"+&/+(,#H/N&
\#A3&$".&9%?3&A(3%+3)&+9,/&?%(,%-23&$".&<,22&-3&%-23&+"&/33&,+&,#&$".(&3#?,("#F3#+&;%#32&"#&+93&
(,H9+&/,)3&"I&@B+.),"N&J+&+322/&./&+9%+&,#&test_variable&A"#+%,#/&A9%(%A+3(/&KchrL5&+93(3&%(3&
+9(33&A9%(%A+3(&?%2.3/&/+"(3)&[1:3]5&%#)&+93$&%(3&"bird", "dog" and "cat"N&&
Using+Existing+R+Code+
J#&%)),+,"#&+"&+$;,#H&+9,#H/&),(3A+2$&,#+"&+93&A"#/"235&<3&A%#&(.#&@&A")3&I("F&3D,/+,#H&I,23/N&!"&
)"&+9,/&<3&#33)&+"&";3#&%&I,23&+9%+&A"#+%,#/&@&A")3N&>3+`/&";3#&+93&J#+("8@N(&I,23&,#&@&/+.),"N&!"&
)"&+9%+&$".&A%#&H"&+"&S,235&\;3#&S,235&%#)&I,#)&+93&`J#+("8@N(`&I,23&+9%+&$".&A%#&)"<#2"%)&I("F&
4"")23N&!9,/&I,23&<,22&";3#&,#&%&#3<&;%#32&%-"?3&+93&A"#/"23N&
!"&(.#&+93&A")3&I("F&+9,/&I,23&<3&9%?3&+<"&";+,"#/N&
dN *";$&%#)&;%/+3&+93&A")3&,#+"&+93&A"#/"23&%#)&;(3//&3#+3(&
lN @.#&+93&A")3&),(3A+2$&I("F&+93&I,23N&!"&)"&+9,/&<3&A%#&;2%A3&".(&A.(/"(&"#&+93&2,#3&"I&+93&I,23&
<3&<%#+&+"&(.#N&B"5&;2%A3&$".(&A.(/"(&"#&+93&test_variable2 <- c("bacteria",
"fungi")&2,#3N&S"(&4%A&./3(/5&$".&A%#&(.#&+9,/&A"FF%#)&-$&9"2),#H&)"<#&$".(&`A"FF%#)`&
P3$&%#)&;(3//,#H&3#+3(N&S"(&X*&./3(/5&$".&A%#&(.#&+9,/&A"FF%#)&-$&9"2),#H&)"<#&$".(&
`A"#+("2`&P3$&%#)&;(3//,#H&3#+3(N&7#"+93(&";+,"#&,/&+"&;2%A3&".(&A.(/"(&"#&+93&2,#3&<3&<%#+&
+"&(.#&%#)&+93#&;(3//,#H&+93&Run&-.++"#&"#&+93&.;;3(&(,H9+&/,)3&"I&+93&;%#32N&'3&A%#&(.#&
F%#$&2,#3/&"I&A")35&"#3&2,#3&%+&%&+,F3&-$&9,H92,H9+,#H&%22&+93&A")3&<3&<%#+&+"&(.#&,#&+93&
I,235&%#)&./,#H&+93&P3$&A"F-,#%+,"#/&F3#+,"#3)&%-"?3N&&
Saving+R+Code+
'93#&<3&)"&%#&%#%2$/,/&,#&@5&<3&)3I,#,+32$&<%#+&+"&/%?3&".(&A")3&/"&<3&A%#&./3&,+&,#&+93&I.+.(3N&
JI&<3&%(3&<"(P,#H&I("F&%#&3D,/+,#H&I,235&<3&A%#&";3#&+93&I,23&%#)&%))&#3<&A")3&%/&<3&<(,+3&,+N&JI&
<3&%(3&/+%(+,#H&%&#3<&I,235&<3&A%#&./3&S,235&Q3<&S,235&@&/A(,;+&+"&A(3%+3&%&#3<&I,23N&7/&<3&(.#&
A"FF%#)/5&+93$&H3+&/+"(3)&,#&+93&`=,/+"($`&+%-&"I&".(&`1#?,("#F3#+Ra%(,%-23&J#/;3A+"(`&K.;;3(&
(,H9+&;%#32LN&JI&<3&(.#&%&A"FF%#)5&%#)&,+&/.AA3//I.22$&)"3/&<9%+&<3&<%#+&,+&+"5&<3&A%#&A2,AP&"#&
+93&A"FF%#)&,#&+93&`=,/+"($`&+%-5&%#)&+93#&A2,AP&`!"&B".(A3`&+"&%))&,+&+"&".(&I,23N&
7/&<3&<(,+3&%#)&/%?3&".(&A")35&<3&F./+&(3F3F-3(&+"&A"FF3#+&,+N&\.(&A"FF3#+/&<,22&-3&./3)&
-$&%#$"#3&(3%),#H&+93&A")3&+"&I,H.(3&".+&<9%+&<%/&)"#3N&\.(&A"FF3#+/&/9".2)0&
dN O3&A2"/3&+"&+93&A")3&<3&%(3&/;3A,I,A%22$&A"FF3#+,#H&K#"+&U./+&%+&+93&+";&"I&+93&I,23L&
lN O3&A23%(&%#)&A"#A,/3&
VN *%;+.(3&,#+3#+&
@3F3F-3(&+9%+&+93&A"FF3#+&/$F-"2&,/&#5&%#)&,+&,/&2,#38/;3A,I,AN&
=3(3&,/&%#&3D%F;23&"I&A"FF3#+3)&A")30&
# This stores the sum of 2,4,6 and 8 as a variable 'sum_numbers'&
sum_numbers <- sum(2,4,6,8) &
&
# This stores the square root of sum_numbers as a &
# variable 'sqrt_numbers'&
sqrt_numbers <- sqrt(sum_numbers) &
Y".(&A"FF3#+/&)"#`+&9%?3&+"&-3&3?3($&2,#35&-.+&/9".2)&-3&3%/,2$&,#+3(;(3+3)N&JI&+93(3&%(3&+(,AP$&
;%(%F3+3(/&,#&$".(&I.#A+,"#/&+9,/&,/&%&H"")&<%$&+"&(3F,#)&$".(/32I&<9$&$".&9%?3&+"&/;3A,I$&
A3(+%,#&+9,#H/N&72/"5&<93#&#%F,#H&?%(,%-23/5&F%P3&/.(3&+"&./3&%&)3/A(,;+,?3&#%F3&+9%+&(3I23A+/&
<9%+&+93&?%(,%-23&,/&/+"(,#HN&
!"&/%?3&".(&@&A")3&I,23/5&<3&A%#&./3&S,235&B%?3&"(&`A"FF%#)`&/&K4%AL5&"(&`A"#+("2`&/&KX*LN&
Important+Concepts+
Objects+
\-U3A+/&%(3&+93&;,3A3/&"I&)%+%&/+"(3)&%/&?%(,%-23/&,#&@N&!93(3&%(3&),II3(3#+&+$;3/&"I&"-U3A+/N&'3&
%2(3%)$&F3#+,"#3)&"#3&+$;35&`A9%(%A+3(`5&<9,A9&,#A2.)3/&23++3(/&%#)&/+(,#H/N&\+93(&+$;3/&"I&
"-U3A+/&<3&<,22&./3&,#&+9,/&A2%//&,#A2.)3&`2"H,A%2`5&<9,A9&%(3&3,+93(&!(.3&"(&S%2/35&`,#+3H3(`5&<9,A9&
%(3&,#+3H3(/5&%#)&F%#$&"+93(/N&&
Variables+
a%(,%-23/&%(3&<9%+&<3&/+"(3&".(&)%+%&%/&,#&@N&'3&#%F3&3%A9&?%(,%-23&K,#&".(&I,(/+&3D%F;235&,+&
<%/&`+3/+g?%(,%-23`L5&%#)&+93(3&%(3&),II3(3#+&+$;3/&"I&?%(,%-23/N&B"F3&+$;3/&,#A2.)3&+93&
I"22"<,#H0&
K)40#"(&!93/3&A%#&-3&A"#/,)3(3)&%&H(".;&"I&)%+%N&!93(3&%(3&),II3(3#+&+$;3/&"I&?3A+"(/5&/"F3&
+9%+&<3&<,22&./3&,#&+9,/&A".(/3&,#A2.)30&`2"H,A%2`&K+(.3/&"(&I%2/3/L5&`,#+3H3(`&K#.F-3(/L5&%#)&
`A9%(%A+3(`&K/+(,#H/LN&
vector_1 <- c(1,2,5.3,6,-2,4) # numeric vector&
vector_2 <- c("one","two","three") # character vector&
vector_3 <- c(TRUE,TRUE,TRUE,FALSE,TRUE,FALSE) #logical vector&
A6(0(&!93/3&%(3&P,#)&"I&2,P3&?3A+"(/5&-.+&+93&"-U3A+/&/+"(3)&,#&+93&2,/+&)"&#"+&9%?3&+"&-3&+93&/%F3&
+$;3N&
list_1 <- c("one",2,TRUE)&
71,406#,(&!93/3&%(3&?%(,%-23/&+9%+&;3(I"(F&%&+%/PN&S"(&3D%F;23&c()&,/&%&I.#A+,"#&+9%+&A"F-,#3/&
"-U3A+/&,#+"&%&?3A+"(N&@&A"F3/&<,+9&F%#$&I.#A+,"#/5&%#)&<3&<(,+3&+93F&<,+9&+93,(&#%F3&
I"22"<3)&-$&;%(3#+93/3/N&
length() #this is a function that will tell us the length of something&
I'AA&a%(,%-23/&+9%+&%(3&Q[>>&A"#+%,#&#"+9,#H5&%#)&%(3&#"+&"I&%&/;3A,I,A&+$;3N&JI&<3&A(3%+3&%&
Q[>>&?%(,%-235&,+&<,22&-3&2,/+3)&,#&".(&3#?,("#F3#+&-.+&,+&<,22&9%?3&#"&%++(,-.+3/N&
nothing <- NULL&
3%0"64)(&7&F%+(,D&,/&%&+%-235&<93(3&%22&+93&A"2.F#/&,#&+93&F%+(,D&F./+&9%?3&+93&/%F3&F")3&
K#.F3(,A5&A9%(%A+3(5&3+ANL&%#)&+93&/%F3&23#H+9N&
# generates 5 x 4 numeric matrix &
test_matrix <- matrix(1:20, nrow=5,ncol=4)&
*%0%97"%&)(&7&)%+%&I(%F3&,/&F"(3&H3#3(%2&+9%#&%&F%+(,D5&,#&+9%+&),II3(3#+&A"2.F#/&A%#&9%?3&
),II3(3#+&F")3/&K#.F3(,A5&A9%(%A+3(5&I%A+"(5&3+ANLN&
# This will make a dataframe where the columns are the filled with &
# the vectors 'd', 'e' and 'f'&
d <- c(1,2,3,4)&
e <- c("red", "white", "red", NA)&
f <- c(TRUE,TRUE,TRUE,FALSE)&
test_dataframe <- data.frame(d,e,f)&
7%40#"(&'3&A%#&+322&@&+9%+&%&?%(,%-23&,/&#"F,#%2&-$&F%P,#H&,+&%&I%A+"(N&!93&I%A+"(&/+"(3/&+93&
#"F,#%2&?%2.3/&%/&%&?3A+"(&"I&,#+3H3(/&%#)&%#&,#+3(#%2&?3A+"(&"I&A9%(%A+3(&/+(,#H/&K+93&"(,H,#%2&
?%2.3/L&F%;;3)&+"&+93/3&,#+3H3(/N&
# Let's say there is a group of people, 3 female and 2 male&
# Let's make a vector that stores how many female (F) and &
# male (M) people there are &
gender <- c("F", "F", "M", "M", "F") &
&
# stores gender as a factor where 1=female, 2=male&
gender <- factor(gender)&
# R now treats gender as a nominal variable &
Operators+
G%+%&,/&F%#,;.2%+3)&,#&;("H(%F/&./,#H&";3(%+"(/&%#)&I.#A+,"#/N&@&9%/&F%#$&-.,2+8,#&";3(%+"(/5&
+93&F"/+&A"FF"#2$&./3)&,#A2.)30&
G"60B&)0649#N)"%0#"(&
u&Q.F3(,A%2&A%2A.2%+,"#/&K;(3/3(?,#H&+93&"()3(&"I&";3(%+,"#/L&
&y&7)),+,"#&+&
&y&B.-+(%A+,"#RA9%#H3&/,H#&-&
&y&4.2+,;2,A%+,"#&*&
&y&G,?,/,"#&/&
?)-%06#,%-9#N)"%0#"(&
u&*"F;%(,#H&?%2.3/&
&y&>3//&+9%#&<&
&y&>3//&+9%#&"(&3Z.%2&+"&<=&
&y&6(3%+3(&+9%#&>&
&y&6(3%+3(&+9%#&"(&3Z.%2&+"&>=&
&y&1Z.%2&+"&==&
&y&Q"+&3Z.%2&+"&!=&
G((6$,&),09#N)"%0#"(&
u&7//,H#,#H&?%2.3/&+"&"-U3A+/&
&y&62"-%2&K$".&<,22&H3#3(%22$&./3&+9,/&"#3L&<-&
&y&>"A%2&K"I+3#&./3)&<,+9,#&I.#A+,"#/L&=&
A#$64%-9#N)"%0#"(&
u&*"#U.#A+,"#/&I"(&A"F-,#,#HR3DA2.),#H&+3(F/&
&y&7QG&&&&
&y&\@&||&
&y&Q\!&!&
/#-#,9#N)"%0#"&
u&*(3%+,#H&(3H.2%(&/3Z.3#A3/&K"I+3#&"I&#.F-3(/L&
&y&:&3D%F;230&3:7&;(").A3/&+93&".+;.+&[1] 3 4 5 6 7&
Q"<&./3&+93&(3/+&"I&+93&A")3&,#&+93&`J#+("8@N(`&I,23&+"&F%P3&%#)&,#/;3A+&),II3(3#+&?%(,%-23&+$;3/&
%#)&";3(%+"(/N&
Loading+Tables+in+R+
JI&$".&(3F3F-3(&<3&H3#3(%+3)&V&+$;3/&"I&+%-23/&<,+9&hJJ410&
u&\![&+%-23&KN-,"F&%#)&N+D+&?3(/,"#/&8&(%(3I,3)&%#)&2"<8)3;+9&I,2+3(3)L&u&72;9%&),?3(/,+$&+%-23&
KN+D+L&u&O3+%&),?3(/,+$&+%-23/&KN+D+L&
OTU+Table+
7#"&%09
!93&I,(/+&+<"&2,#3/&,#A2.)3&%&/;%A3(&2,#3&)3+%,2,#H&9"<&+93&I,23&<%/&"#A3&%&N-,"F&I"(F%+5&%#)&+93&
A"2.F#&93%)3(/N&Q"+3&+9%+&+93/3&2,#3/&/+%(+&<,+9&%&`j`5&<9,A9&./.%22$&(3;(3/3#+/&%&A"FF3#+&2,#3&
K/"F3+9,#H&+93&A"F;.+3(&)"3/#`+&(3%)L5&/"&<3&<,22&9%?3&+"&;%$&%++3#+,"#&+"&9"<&@&(3%)/&".(&
\![&+%-23N&
?#+(&
\![&JG5&<9,A9&,/&%&.#,Z.3&JG&I"(&3%A9&/3+&"I&/3Z.3#A3/&+9%+&%(3&ext&,)3#+,A%2N&
/#-1&,(9W90B"#1$B90B)9()4#,.9-%(0&
1%A9&A"2.F#&(3;(3/3#+/&%&/%F;23N&!93&#.F-3(/&,#&3%A9&("<&A"((3/;"#)&+"&+93&#.F-3(&"I&(3%)/&
+9%+&F%;;3)&+"&+93&/;3A,I,3)&\![&JG&,#&+93&I,(/+&A"2.F#N&
A%(09/#-1&,&
!93&%//,H#3)&+%D"#"F,A&,)3#+,+$&I"(&3%A9&\![&K3NHN&S"(&PggO%A+3(,%v&;ggO%A+3(",)3+3/v&
AggO%A+3(",),%v&"ggO%A+3(",)%23/v&IggX(3?"+322%A3%3v&HggX(3?"+322%v&/ggA";(,LN&P&k&P,#H)"F5&+93&
;&k&;9$2.F5&A&k&A2%//5&"&k&"()3(5&I&k&I%F,2$5&H&k&H3#./&%#)&/k&/;3A,3/N&
B33&3D%F;23&"I&+93&I,(/+&b&2,#3/&"I&%#&\![&+%-23&+9%+&,/&,#&+93&(3Z.,(3)&I"(F%+0&
&
alt:text:
A#%.6,$9Q='9=%<-)9
S,(/+5&./3&+93&I.#A+,"#&read.table()&+"&(3%)&,#&$".(&\![&+%-23N&!93/3&?%(,"./&%(H.F3#+/&%(3&
%22&/3+&/;3A,I,A%22$&I"(&+93&I"(F%+&"I&$".(&\![&+%-23&,#&N+D+&I"(F%+N&
comment =&,/&+322,#H&@&<9%+&/9".2)&-3&,#+3(;(3+3)&%/&%&A"FF3#+&?3(/./&%/&%&2,#3&"I&A")3N&!93&
)3I%.2+&I"(&+9,/&,/&+93&;".#)&/,H#&`j`&-.+&/,#A3&<3&<%#+&+93&A"2.F#&93%)3(&,#I"(F%+,"#&<3&+.(#&
"II&+93&,#+3(;(3+%+,"#&"I&A"FF3#+/&./,#H&+93&";+,"#&comment = ''&
header =&,/&+322,#H&@&<93+93(&+93&I,(/+&2,#3&"I&A")3&/9".2)&-3&%//,H#3)&%/&("<&d&"(&%/&+93&
A"2.F#&#%F3/N&'3&/3+&+9,/&+"&TRUE&"(&TN&
sep =&)3I,#3/&+93&I,32)&/3;%(%+"(&A9%(%A+3(&<9,A9&,#&+9,/&A%/3&,/&%&+%-5&/"&sep = '\t'&
skip =&+322/&@&9"<&F%#$&("</&+"&/P,;&<93#&(3%),#H&,#&+93&+%-23N&!93&)3I%.2+&I"(&+9,/&,/&c5&-.+&,#&
+9,/&A%/35&<3&<%#+&+"&,H#"(3&+93&I,(/+&2,#3&`j&*"#/+(.A+3)&I("F&-,"F&I,23`&/"&<3&/P,;&+93&I,(/+&2,#3N&
as.is =&A"#+("2/&+93&,#+3(;(3+%+,"#&"I&A9%(%A+3(&?%(,%-23/&%/&%&A9%(%A+3(&/+(,#H&?/N&%/&%&I%A+"(N&
!"&%?",)&9%?,#H&+9"./%#)/&"I&23?32/&%//"A,%+3)&<,+9&".(&+%D"#"F$&A"2.F#5&<3&/;3A,I$&as.is=T&
check.names =&)3+3(F,#3/&<93+93(&+93&#%F3/&"I&?%(,%-23&,#&+93&)%+%&I(%F3&%(3&/$#+%A+,A%22$&
?%2,)N&O3A%./3&".(&/%F;23&#%F3/&,#&".(&)%+%&/3+&/+%(+&<,+9&#.F-3(/5&<9,A9&<".2)&A%./3&
;("-23F/&,#&@5&<3&9%?3&+"&/3+&check.names=F&
row =&<,22&+322&@&,I&<3&<".2)&2,P3&+"&/3+&"#3&"I&+93&A"2.F#/&+"&-3&+93&("<&#%F3/N&J#&+9,/&A%/3&<3&
<".2)&2,P3&+"&/3+&+93&I,(/+&A"2.F#5&<9,A9&,/&+93&\![&JG/&+"&-3&+93&("<&#%F3/N&K("<kdL&
Y".&<,22&9%?3&+"&A9%#H3&+93&#%F3&"I&+93&\![&+%-23&+"&-3&+93&#%F3&"I&$".(&+%-23N&
# Now we can read in the table - This the the rarefied one&
otu <- read.table("otu_rare2000.txt", &
comment="", &
header=TRUE, &
sep="\t",&
skip=1, &
as.is=TRUE, &
check.names=F,&
row=1)&
&
# Read in the low depth removed OTU table&
otu_low <- read.table("otu_rare2000.txt", &
comment="", &
header=TRUE, &
sep="\t",&
skip=1, &
as.is=TRUE, &
check.names=F,&
row=1)&
@3F3F-3(5&$".&A%#&%2<%$/&I,#)&".+&F"(3&%-".+&%&I.#A+,"#&-$&./,#H&+93&932;KL&I.#A+,"#&"(&+93&~N&
B"5&+"&I,#)&".+&%-".+&+93&read.table()&I.#A+,"#5&$".&A".2)&)"&+93&I"22"<,#H0&
?read.table()&
!"&I,#)&".+&,#I"(F%+,"#&%-".+&".(&+%-23&<3&A%#&./3&),II3(3#+&I.#A+,"#/N&S"(&3D%F;235&<3&A%#&
I,#)&".+&+93&("<&#%F3/&%#)&A"2.F#&#%F3/&./,#H&rownames()&%#)&colnames()5&(3/;3A+,?32$N&
'3&A%#&I,#)&+93&),F3#/,"#/&<,+9&dim()5&%#)&<3&A%#&;(,#+&+93&I,(/+&A".;23&"I&2,#3/&K)3I%.2+&,/&dcL&
<,+9&head()N&'3&A%#&%2/"&A2,AP&"#&".(&+%-23&,#&+93&1#?,("#F3#+&;%#32&+"&?,3<&+93&<9"23&+%-23N&
# View first 2 lines using head()&
head(otu, n=2)&
&
# View dimensions&
dim(otu)&
&
# Print row names (which are OTU IDs)&
row.names(otu)&
&
#Print column names (which are samples IDs the taxonomy header)&
colnames(otu)&
Alpha+Diversity+File+
X(,"(&+"&;2"++,#H&,#&@5&<3&#33)&+"&H3#3(%+3&%#&%2;9%&),?3(/,+$&+%-23&,#&hJJ41N&!9,/&I,23&<,22&-3&+93&
".+;.+&"I&alpha_diversity.py5&%#)&<,22&-3&%&+%-8)32,F,+3)5&;2%,#&+3D+&I,23N&!93&I"(F%+&I"(&+93&
%2;9%&),?3(/,+$&I,23&,/&+93&I"22"<,#H0&
7#"&%09
?#+(&
!93&("</&%(3&+93&/%F;23&JG/N&
/#-1&,(&
1%A9&A"2.F#&(3;(3/3#+/&%&),?3(/,+$&F3+(,A&K3NHN&PD_whole_tree5&simpson5&shannon5&"(&
observed_speciesLN&!93&#.F-3(/&,#&3%A9&("<&A"((3/;"#)&+"&%2;9%&),?3(/,+$&3/+,F%+3&I"(&+93&
%//"A,%+3)&/%F;23N&
A#%.6,$9G-NB%9*6E)"(6029
# Read in the alpha diversity table&
alpha <- read.table("Alpha_Div.txt", &
sep='\t', &
header=TRUE, &
as.is=TRUE, &
check.names=FALSE,&
row=1)&
I#064)M&
u&'3&/3+&+93&93%)3(&+"&-3&+93&I,(/+&("<&K%2;9%&),?3(/,+$&F3+(,A/L&
u&'3&/3+&+93&("<#%F3/&+"&-3&+93&I,(/+&A"2.F#&K/%F;23&JG/L&
Beta+Diversity+File+
X(,"(&+"&;2"++,#H&,#&@5&<3&#33)&+"&H3#3(%+3&%#&%&),/+%#A3&F%+(,D&H3#3(%+3)&-$&<,+9&hJJ41N&!9,/&
I,23&<,22&-3&+93&".+;.+&"I&beta_diversity.py5&%#)&<,22&-3&%&+%-8)32,F,+3)5&;2%,#&+3D+&I,23N&!93&
I"(F%+&I"(&+93&-3+%&),?3(/,+$&I,23&,/&+93&I"22"<,#H0&
7#"&%09
?#+(&
!93&("</&%(3&+93&/%F;23&JG/N&
/#-1&,(&
1%A9&A"2.F#&,/&%2/"&%&/%F;23&JG&%#)&+93&),/+%#A3/&I("F&"#3&/%F;23&+"&%#"+93(&%(3&+93&?%2.3/N&
Y".&/9".2)&9%?3&"#3&I"(&3%A9&F3+(,A&$".&./3)&K[#<3,H9+3)&[#,S(%A5&'3,H9+3)&[#,S(%A5&%#)&
O(%$8*.(+,/LN&
# Load the beta diversity matrix, notice that we use read.table(),&
# but then change from a dataframe to a matrix with as.matrix()&
beta <- as.matrix(read.table("unweighted_unifrac_dm.txt", &
sep = "\t", &
header=T, &
row = 1, &
as.is = T, &
check.names = F))&
I#064)M&
u&'3&/3+&+93&93%)3(&+"&-3&+93&I,(/+&("<&K!93/3&%(3&/%F;23&JG/L&
u&'3&/3+&+93&("</&#%F3/&+"&-3&+93&I,(/+&A"2.F#&K!93/3&%(3&%2/"&/%F;23&JG/L&
Metadata+File+
Y".(&F3+%)%+%&I,23&K%2/"&A%223)&%&F%;;,#H&I,23L&,/&%&)%+%&+%-23&A"#+%,#,#H&,#I"(F%+,"#&%-".+&+93&
/%F;23/&,#&$".(&)%+%/3+N&J#&"()3(&+"&%//3//&9"<&+%D%&A"((32%+3&<,+9&?%(,%-23/&"I&,#+3(3/+&K3NHN&
A".#+($5&-")$&/,+35&/;3A,3/5&3A"(3H,"#5&O4J5&3+ANL5&<3&#33)&+"&9%?3&+9%+&,#I"(F%+,"#&%-".+&".(&
/%F;23/&%AA3//,-23N&!93&F3+%)%+%&I,23&I"(&".(&)%+%&/3+&,/&HMP_5BS_metadata.txtN&
7#"&%09
?#+(&
!93&%A+.%2&F%;;,#H&I,23&/+%(+/&<,+9&`jB%F;23JG`&%/&+93&I,(/+&93%)3(N&!9,/&A"#+%,#/&%&+93&/%F;23&
JG/5&<9,A9&%(3&.#,Z.3&JG/&I"(&3%A9&/%F;23&,#&+93&)%+%/3+N&!"&<"(P&,#&hJJ415&+9,/&F./+&9%?3&%&`j`&
%+&+93&/+%(+N&@3F3F-3(&+9%+&`j`&./.%22$&(3;(3/3#+/&%&A"FF3#+&2,#3&K/"F3+9,#H&+93&A"F;.+3(&
)"3/#`+&(3%)L5&/"&<3&<,22&9%?3&+"&;%$&%++3#+,"#&+"&9"<&@&(3%)/&,#&+9,/&I,23N&
/#-1&,(9W9S9-%(094#-1&,&
1%A9&A"2.F#&(3;(3/3#+/&%&)3/A(,;+,"#&"I&+93&/%F;23N&J+&A%#&-3&%#$+9,#H&,#A2.),#H&)3+%,2/&%-".+&
+93&;%+,3#+5&;3(/"#5&%#,F%2&"(&2"A%+,"#&+93&/%F;23&<%/&+%P3#&I("FN&!9,/&I,23&/9".2)&A"#+%,#&#"&
/;%A3/&"(&3F;+$&A"2.F#/R("</N&
A#%.6,$93)0%.%0%976-)9
'3&2"%)&+93&F3+%)%+%&+%-23&U./+&2,P3&+93&\![&+%-235&-.+&#"+,A3&+9%+&+93&skip&;%(%F3+3(&,/&23I+&
".+5&-3A%./3&+93&F3+%)%+%&+%-23&)"3/#`+&9%?3&+93&%)),+,"#%2&I,(/+&2,#3&+9%+&+93&\![&+%-23&9%/N&
metadata <- read.table('HMP_5BS_metadata.txt', &
header=T, &
sep='\t', &
check.names=F, &
comment='',&
row=1)&
I#064)M&
u&'3&/3+&+93&93%)3(&+"&-3&+93&I,(/+&("<&K!93/3&%(3&/%F;23&JG/L&
u&'3&/3+&+93&("</&#%F3/&+"&-3&+93&I,(/+&A"2.F#&K!93/3&%(3&%2/"&/%F;23&JG/L&
u&'3&+"2)&@&+"&,H#"(3&+93&`j`&,#&+93&I,(/+&2,#3&
'9%+&%(3&+93&),F3#/,"#/&"I&+93&F3+%)%+%&I,23~&="<&<".2)&$".&I,#)&+9,/&".+~&'3&<3#+&"?3(&+9,/&
,#&+93&`Q"(F%2,M,#H&\![&!%-23&+.+"(,%2`&
'9%+&?%(,%-23/&)"&<3&9%?3&%?%,2%-23&I"(&+9,/&)%+%&/3+~&!93$&%(3&+93&A"2.F#&93%)3(/N&Y".&A%#&
I,#)&+9,/&".+&./,#H&colnames()N&
colnames(metadata)&
## [1] "BarcodeSequence" "LinkerPrimerSequence" "Sex" &
## [4] "BodySite" "SRS_SampleID" "FASTA_FILE" &
## [7] "Description" "Age"&
!93&F%;;,#H&I,23&,/&<9%+&<3&./3&,#&+93&F%U"(,+$&"I&".(&hJJ41&A"FF%#)/&/"&,+&A"#+%,#/&
,#I"(F%+,"#&%-".+&+93&/3Z.3#A,#H&I,23/&K3NHN&BarcodeSequence5&LinkerPrimerSequence5&
FASTA_FILE5&%#)&SRS_SampleIDL5&+9%+&%(3&#"+&#3A3//%($&I"(&".(&%#%2$/,/N&'3&#33)&+93&/%F;23&
JG/&+"&F%+A9&".(&?%(,%-23/&+"&+93&F,A("-,%2&%-.#)%#A3&,#I"(F%+,"#&A"#+%,#3)&,#&".(&\![&+%-23N&
1%A9&A"2.F#&,#&+9,/&I,23&,/&%&?%(,%-23&K%2/"&A%223)&%&A"?%(,%+3L5&<9,A9&A%#&-3&)3I,#3)&%/&-3,#H&
A"#+,#."./&"(&A%+3H"(,A%2N&*%+3H"(,A%2&?%(,%-23/&%(3&)3/A(,-3)&%/&I%A+"(/5&+93&23?32/&"I&<9,A9&%(3&
+93&A%+3H"(,3/&<,+9,#&,+N&Y".&A%#&?,3<&+93&#.F-3(&%#)&,)3#+,+$&"I&23?32/&I"(&%&A%+3H"(,A%2&
?%(,%-23&-$&A%22,#H&,+5&"(&./,#H&+93&str()&I.#A+,"#N&
# View the 'Sex' column of the mapping file dataframe&
metadata[,'Sex']&
## [1] female female female female male female male male male
female&
## [11] female female male male female male male female female
female&
## [21] male male male male male female female female male male &
## [31] male male female male male female female male male
female&
## [41] female male female female female male female female female male &
## [51] female female male male female female female female male male &
## [61] male male female female male female female female female male &
## [71] male male female male male female male male male
female&
## [81] female male male female female female male male male male &
## [91] male female female female female male male female male
female&
## [101] female female male female&
## Levels: female male&
Q"+,A3&9"<&<3&<("+3&+93&A"FF%#)&+"&%AA3//&+93&`B3D`&A"2.F#N[,]&,/&%&<%$&+"&/;3A,I$&("</&%#)&
A"2.F#/&"(&%&F%+(,D&"(&)%+%I(%F3N&J#/,)3&+93&/Z.%(3&-(%AP3+/5&+93&I,(/+&,#)3D&/;3A,I,3)&,/&+93&
("<5&%#)&+93&/3A"#)&K%I+3(&+93&A"FF%L&,/&+93&A"2.F#N&B"&<9%+&<3&<("+3&<%/5&i),/;2%$&%22&+93&
("</&K23I+&-2%#PL5&,#&+93&`B3D`&A"2.F#5&"(&metadata[,'Sex']N&'3&A%#&%2/"&./3&+93&("<&#.F-3(&
"(&A"2.F#&#.F-3(&metadata[,4]N&
O3A%./3&".(&F%;;,#H&I,23&,/&2"%)3)&%/&%&)%+%I(%F35&<3&A%#&%2/"&)"&+9,/&./,#H&+93&i•iN&
# Notice that using "$" only works for dataframes and not matrices&
metadata$Sex&
## [1] female female female female male female male male male
female&
## [11] female female male male female male male female female
female&
## [21] male male male male male female female female male male &
## [31] male male female male male female female male male
female&
## [41] female male female female female male female female female male &
## [51] female female male male female female female female male male &
## [61] male male female female male female female female female male &
## [71] male male female male male female male male male
female&
## [81] female male male female female female male male male male &
## [91] male female female female female male male female male
female&
## [101] female female male female&
## Levels: female male&
# The class function will also tell you whether your variable is &
# a factor, numeric, character, etc.&
class(metadata[,'Sex'])&
## [1] "factor"&
Formatting+Your+Data+
J#&"()3(&+"&%//3//&(32%+,"#/9,;/&-3+<33#&/%F;23&,#I"(F%+,"#&,#&".(&\![&+%-235&%2;9%&),?3(/,+$&
%#)&-3+%&),?3(/,+$5&<3&#33)&+"&F%+A9&+93&"()3(&"I&".(&)%+%&I(%F3/N&S"(&+9%+&<3&<,22&./3&+93&
intersect()&I.#A+,"#N&O3A%./3&<3&9%?3&;"+3#+,%22$&(3F"?3)&"#3&"(&F"(3&/%F;23/&I("F&".(&
\![&+%-23&).(,#H&(%(3I%A+,"#5&I,2+3(,#H&"(&"+93(&F%#,;.2%+,"#/5&<3&A%#&I,(/+&)3I,#3&+93&/.-/3+&"I&
/%F;23/&,#&%22&"I&".(&+%-23/N&
intersect()&A%#&(3+%,#&%22&+93&/%F;23&JG/&+9%+&%(3&,#&+93&\![&+%-23&%#)&%2/"&,#&+93&F3+%)%+%&
I,23N&'3&A%#&+93#&/.-/3+&%22&"I&".(&+%-23/&+"&P33;&U./+&+9"/3&/%F;23/N&
# First, define all the samples in the OTU table.&
# Remember, when we load in the OTU table, samples are columns&
# Remember, the last column in the OTU table is taxonomy, so ommit the last
column&
samples1 <- colnames(otu)[1:(ncol(otu)-1)]&
&
# Now let's see what the intersect with the metadata row names are&
IDs_Keep <- intersect(samples1, rownames(metadata))&
&
# Now let's filter the metadata to keep only those samples&
# We do this by telling R to make a new data frame that only has the rows we
want&
metadata <- metadata[IDs_Keep,]&
&
# Now let's filter the OTU table to keep just the intersecting samples&
# We will store it as a new otu table (incase we need the old one)&
# Remember, OTU table has columns as samples!&
# This will also remove the taxonomy, because it's not a sample ID we want&
otu2 <- otu[,IDs_Keep] #for rarefied&
otu_low2 <- otu_low[, IDs_Keep] #for low depth removed&
&
# To add the taxonomy back, we can use the taxonomy info from&
# the orignal table&
otu2$taxonomy <- otu$taxonomy&
otu_low2$taxonomy <- otu_low$taxonomy &
&
# Now let's filer the alpha diversity table to keep those samples too&
# Alpha diversity has the samples as row names&
alpha <- alpha[IDs_Keep, ]&
&
# Now let's filer the beta diversity table to keep those samples too&
# Beta diversity has the samples as row names AND column names&
# We must filter both the rows and columns&
beta <- beta[IDs_Keep,IDs_Keep]&
&
#Let's check to make sure the samples match &
as.character(rownames(metadata)) == colnames(otu2)[1:(ncol(otu2)-1)]&
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
TRUE&
## [15] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
TRUE&
## [29] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
TRUE&
## [43] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
TRUE&
## [57] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
TRUE&
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
TRUE&
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
TRUE&
## [99] TRUE TRUE TRUE TRUE&
#Let's see how many samples are in the otu table (columns) and mapping&
&
ncol(otu) #There should be one more here because there is also a taxonomy row&
## [1] 103&
nrow(metadata)&
## [1] 102&
Plotting+in+R+
ggplot+
!"&?,/.%2,M3&".(&)%+%&,#&@&<3&<,22&./3&+93&;%AP%H3&ggplot2()N&!9,/&;%AP%H3&%22"</&./&+"&F%P3&
)3+%,23)&%#)&/;3A,I,A&?,/.%2,M%+,"#&#33)3)&+"&-3/+&/9"<&".(&(3/.2+/N&>3+`/&/+%(+&<,+9&+93&;%AP%H3/&
<3&#33)&+"&2"%)N&JI&+93/3&%(3&#"+&,#/+%223)&$".&A%#&,#/+%22&+93F&I,(/+&<,+9&install.packages()N&
library(ggplot2)&
## Warning: package 'ggplot2' was built under R version 3.3.2&
$$N-#096,N109
HH;2"+&2,P3/&+"&9%?3&%22&+93&)%+%&I"(&+93&;2"+&,#&"#3&+%-23N&B;3A,I,A%22$5&,+&2,P3&+"&-3&%-23&+"&%AA3//&
+93&,#I"(F%+,"#&#33)3)&-$&./,#H&A"2.F#/N&>3+`/&./3&%2;9%&),?3(/,+$&%/&%#&3D%F;23N&'3&<,22&./3&
".(&%2;9%&),?3(/,+$&F3%/.(3F3#+/&%/&+93&,#;.+&)%+%&I"(&".(&3D%F;23/N&
S,(/+&<3&9%?3&+"&A"F-,#3&".(&%2;9%&),?3(/,+$&(3/.2+/&<,+9&+93&F3+%)%+%&+9%+&+322/&./&<9,A9&-")$&
/,+3&3%A9&/%F;23/&A"F3/&I("FN&3%U)9(1")92#19B%E)92#1"9&)0%.%0%9%,.9%-NB%9.6E)"(60290%<-)(9
-#%.).9%,.90B%090B)9(%&N-)(9%")9(1<()00).9%,.96,90B)94#"")409#".)"9D#"9<#0B90%<-)(O&
# We will make a copy of our metadata to work with&
combined_alphadata <- metadata&
&
# Because our sample order is the same, we can make a new column in the table&
# This column will contain all the Shannon index measurements for the samples&
combined_alphadata$shannon <- alpha$shannon&
$$N-#097#"&%09
HH;2"+&A(3%+3/&;2"+/&,#&2%$3(/N&S,(/+&<3&F%P3&+93&-%/3&2%$3(&<,+9&ggplot()&%#)&+93#&%))&"#&
),II3(3#+&+$;3/&"I&;2"++,#H&+$;3/&%#)&%3/+93+,A/N&
G)(0B)06493%NN6,$9
J#&HH;2"+5&%3/+93+,A&F3%#/&(#&)0B6,$92#194%,9())N&S"(&3D%F;230&
u&;"/,+,"#&K,N3N5&"#&+93&D&%#)&$&%D3/L&
u&A"2"(&K€".+/,)3•&A"2"(L&
u&I,22&K€,#/,)3•&A"2"(L&
u&/9%;3&K"I&;",#+/L&
u&2,#3+$;3&
u&/,M3&
g)#&)0649Q<^)40(9T$)#&V9
63"F3+(,A&"-U3A+/&%(3&+93&%A+.%2&A9%(%A+3(/&<3&;.+&"#&%&;2"+N&S"(&3D%F;230&
u&;",#+/&K3NHN&H3"Fg;",#+5&I"(&/A%++3(&;2"+/L&
u&2,#3/&K3NHN&H3"Fg2,#35&I"(&2,#3&H(%;9/L&
u&-"D;2"+&K3NHN&H3"Fg-"D;2"+5&I"(&-"D&;2"+/L&
Plotting+
O3I"(3&<3&;2"+&".(&)%+%5&<3&#33)&+"&+9,#P&%-".+&<9%+&+$;3&,+&,/N&J#&+93&I,(/+&3D%F;23&+93&
,#)3;3#)3#+&?%(,%-23&<,22&-3&iO")$B,+3i5&<9,A9&,/&),/A(3+3N&\.(&)3;3#)3#+&?%(,%-235&%2;9%&
),?3(/,+$&KB9%##"#&J#)3DL5&I%22/&,#&%&(%#H3&"I&?%2.3/N&'3&A".2)&./3&%&-%(&A9%(+5&-.+&%&-"D&;2"+&<,22&
+322&./&F"(3&%-".+&+93&)%+%/3+N&
Y#>9!-#0(9
JI&<3&(3F3F-3(5&O"D&;2"+/&A"#/,/+&"I&/3?3(%2&I3%+.(3/0&0B)9<#>5&<9,A9&3D+3#)/&I("F&+93&I,(/+&
Z.%(+,23&KhdL&+"&+93&+9,()&Z.%(+,23&KhVL5&(3/;3A+,?32$5&<,+9&+93&F3),%#&KhlL&)3;,A+3)&-$&%&?3(+,A%2&
2,#3&<,+9,#&+93&-"Dv&+B6(U)"(5&<9,A9&3D+3#),#H&?3(+,A%22$&I("F&+93&-"D&%#)&,#),A%+3&+93&(%#H3&"I&
?%(,%-,2,+$&".+/,)3&"I&+93&.;;3(&%#)&2"<3(&Z.%(+,23/v&%#)&#10-6)"(5&<9,A9&%(3&,#),?,).%2&;",#+/&
".+/,)3&"I&+93&-"D&%#)&<9,/P3(/N&
J+&,/&,F;"(+%#+&+"&#"+3&+9%+&-"D&;2"+/&%(3&H"")&I"(&,#,SN%"%&)0"64&)%+%N&!93$&),/;2%$&?%(,%+,"#&
,#&/%F;23/&"I&%&/+%+,/+,A%2&;";.2%+,"#&<,+9".+&F%P,#H&%#$&%//.F;+,"#/&"I&+93&.#)3(2$,#H&
/+%+,/+,A%2&),/+(,-.+,"#N&!93&/;%A,#H&-3+<33#&+93&,#+3(Z.%(+,23&(%#H3&"I&+93&-"D&,#),A%+3/&+93&
)3H(33&"I&),/;3(/,"#&K/;(3%)L&%#)&/P3<#3//&,#&+93&)%+%N&
F.1""0&;)G=-?D.%)H)*)I0(54%"%)J*3-40-K.%)
ggplot() + &
geom_boxplot(data=combined_alphadata, aes(x= BodySite, y= shannon))&
&
I#064)M&
u&'3&A(3%+3)&+93&-%/3&2%$3(&<,+9&ggplot() * We add the next layer withy* We added
a boxplot withH3"Fg-"D;2"+KL* We specified which table to use with)%+%k* We
specified the aesthetics within%3/KL{&
u&!93&)3;3#)3#+&?%(,%-23&K$L&,/&A%223)&-$&,+/&A"2.F#&#%F3&
u&!93&,#)3;3#)3#+&?%(,%-23&KDL&,/&A%223)&-$&,+/&A"2.F#&#%F3&
;4%00)"9!-#0(9
J#&+93&/3A"#)&3D%F;23&+93&,#)3;3#)3#+&?%(,%-23&<,22&-3&i7H3i5&<9,A9&,/&A"#+,#."./N&\.(&
)3;3#)3#+&?%(,%-235&%2;9%&),?3(/,+$&KB9%##"#&J#)3DL5&I%22/&,#&%&(%#H3&"I&?%2.3/N&!93(3I"(3&<3&A%#&
./3&%&/A%++3(&;2"+&+"&2""P&I"(&+(3#)/N&
F.1""0&;)G=-?D.%)L)*)E1&"0&#1#()J*3-40-K.%)
ggplot() + &
geom_point(data=combined_alphadata, aes(x= Age, y= shannon))&
&
I#064)M&
u&'3&A(3%+3)&+93&-%/3&2%$3(&<,+9&ggplot() * We add the next layer withy* We added
a scatter plot withH3"Fg;",#+KL* We specified which table to use with)%+%k* We
specified the aesthetics within%3/KL{&
u&!93&)3;3#)3#+&?%(,%-23&K$L&,/&A%223)&-$&,+/&A"2.F#&#%F3&
u&!93&,#)3;3#)3#+&?%(,%-23&KDL&,/&A%223)&-$&,+/&A"2.F#&#%F3&
Q0B)"9!-#006,$9P>%&N-)(9
F.1""0&;)G=-?D.%)M)*)<41&;)F.1")+/D%()
ggplot() + &
geom_boxplot(data=combined_alphadata, aes(x= Age, y= shannon))&
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?&
&
I#064)M&
u&'3&+(,3)&+"&F%P3&%&-"D;2"+&<,+9&A"#+,#."./&)%+%N&J+&A23%(2$&)"3/#`+&<"(P&<322m&
F.1""0&;)G=-?D.%)N)*),$$0&;)0&)?14%)-%("@%"05()
# Specifying the color changes the outline color &
ggplot() + &
geom_boxplot(data=combined_alphadata, aes(x= BodySite, y= shannon,
color=BodySite))&
&
# Specifying the fill changes the interior color&
ggplot() + &
geom_boxplot(data=combined_alphadata, aes(x= BodySite, y= shannon, fill=
BodySite))&
&
# Specifying the fill to a color changes the interior color for all&
ggplot() + &
geom_boxplot(data=combined_alphadata, aes(x= BodySite, y= shannon, fill=
BodySite))&
&
# Specifying the fill outside of aes() changes it for all x values&
# You must pick an exact color if you'd like to do this&
ggplot() + &
geom_boxplot(data=combined_alphadata, fill = "red", aes(x= BodySite, y=
shannon))&
&
# Specifying the theme changes the background&
ggplot() + &
geom_boxplot(data=combined_alphadata, fill="red", aes(x= BodySite, y=
shannon)) +&
theme_bw() #this is the black and white theme&
&
# We can pick any colors we want to fill by&
ggplot() + &
geom_boxplot(data=combined_alphadata, aes(x= BodySite, y= shannon,
fill=BodySite)) +&
theme_bw() +&
scale_fill_manual(values= c("blue", "green", "pink", "grey", "yellow"))&
&
F.1""0&;)G=-?D.%)O)*),$$0&;)P-/%4()12)F.1"()
# Add a scatter on top of the boxplots &
# To do this, we have to specify the data frame for each layer and the aes()
for each layer&
ggplot() + &
geom_boxplot(data=combined_alphadata, aes(x= BodySite, y= shannon,
color=BodySite)) +&
geom_jitter(data=combined_alphadata, aes(x=BodySite, y=shannon,
color=BodySite))&
&
# We can decrease the jitter on the scatter plot with width=&
ggplot() + &
geom_boxplot(data=combined_alphadata, aes(x= BodySite, y= shannon,
color=BodySite)) +&
geom_jitter(data=combined_alphadata, width= 0.1, aes(x=BodySite, y=shannon,
color=BodySite))&
&
# If all the data for your entire plot will use the same data frame you can
specify that in the ggplot()&
ggplot(data=combined_alphadata) +&
geom_boxplot(aes(x= BodySite, y= shannon, color=BodySite)) +&
geom_jitter(width= 0.1, aes(x=BodySite, y=shannon, color=BodySite))&
&
# If all the data and aes() for your entire plot will be the same, you can
specify it in ggplot()&
ggplot(data=combined_alphadata, aes(x=BodySite, y=shannon, color=BodySite)) +&
geom_boxplot() +&
geom_jitter(width= 0.1)&
&
# You can make this specific to just one plot layer&
# We are using color ONLY in the geom_jitter here&
ggplot(data=combined_alphadata, aes(x=BodySite, y=shannon)) +&
geom_boxplot() +&
geom_jitter(width= 0.1, aes(color=BodySite))&
&
Alpha+Diversity+Differences+in+R+
Input+data+
G-NB%9*6E)"(6029=%<-)9%,.93)0%.%0%9
Y".(&%2;9%&),?3(/,+$&+%-23&%#)&F3+%)%+%&+%-23&/9".2)&-3&2"%)3)N&@3F3F-3(&+"&/.-/3+&+93&+%-23/&
/"&+9%+&+93&/%F;23/&JG/&%(3&A"((3A+&%#)&,#&+93&/%F3&"()3(N&J#&+93&i>"%),#H&!%-23/&,#&@i&/3A+,"#&
<3&/%?3)&+93/3&+%-23/&%/&alpha&%#)&metadata&
Q"<&<3&#33)&+"&;,AP&<9,A9&A"?%(,%+3&<3&<".2)&2,P3&+"&./3&I"(&+93&;2"+5&%#)&<9,A9&%2;9%&
),?3(/,+$&F3+(,A&<3&<".2)&2,P3&+"&?,/.%2,M3N&'3&<,22&./3&iB3Di&%#)&+93&i/9%##"#i&),?3(/,+$&
F3+(,AN&
Testing+for+Differences+
"-Tests+
7&t8+3/+&A%#&-3&./3)&+"&)3+3(F,#3&,I&+<"&/3+/&"I&)%+%&%(3&/,H#,I,A%#+2$&),II3(3#+&I("F&3%A9&"+93(&
-%/3)&"#&+93&;";.2%+,"#&F3%#/N&J+&%//.F3/&+93&)%+%&%(3&#"(F%22$&),/+(,-.+3)N&72+9".H9&,+&,/&
+$;,A%22$&%//.F3)&+9%+&)%+%&"I&%&2%(H3&3#".H9&/%F;23&/,M3&%(3&#"(F%22$&),/+(,-.+3)5&+9,/&,/&#"+&
%2<%$/&+93&A%/3N&
Testing+for+Normality+
>3+`/&/33&,I&".(&)%+%&%(3&#"(F%22$&),/+(,-.+3)&./,#H&+93&9,/+KL&I.#A+,"#N&'3&<,22&)"&+9,/&I"(&3%A9&
H(".;&,#&+93&A"?%(,%+3&<3&%(3&+3/+,#H&K,#&+9,/&A%/35&`/3D`LN&
# We will find the samples that are male in the metadata&
males.ix <- metadata$Sex == 'male'&
# And subset the alpha table to include only those, and store it as 'males'&
males <- alpha[males.ix,]&
&
# We will do the same for females&
females.ix <- metadata$Sex == 'female'&
females <- alpha[females.ix,]&
&
# Now we can plot the histograms&
hist(females$shannon, xlab="Alpha Diversity", main='Females')&
&
hist(males$shannon, xlab="Alpha Diversity", main='Males')&
&
G"&+9"/3&2""P&#"(F%22$&),/+(,-.+3)~&S"(&+93&F"/+&;%(+&+93$&)"5&-.+&,+&,/&/"F3+,F3/&9%()&+"&+322N&
!93&-3/+&<%$&+"&)3+3(F,#3&,I&$".(&)%+%&%(3&#"(F%22$&),/+(,-.+3)&,/&+"&)"&%&/+%+,/+,A%2&+3/+N&
=B)9;B%N6"#S56-U9I#"&%-6029=)(09
!9,/&+3/+&A%#&-3&(.#&<,+9&+93&shapiro.test()&I.#A+,"#&,#&@N&J+&<,22&H3#3(%+3&%#&%;;("D,F%+3&;8
?%2.35&<9,A9&,/&%)3Z.%+3&,#&%//3//,#H&#"(F%2,+$N&J#&+9,/&A%/35&NSE%-1)(9-)((90B%,9aOW96,.64%0)90B)9
.%0%9%")9(6$,6D64%,0-29.6DD)"),09D"#&9,#"&%-9.6(0"6<106#,N&'3&<,22&(.#&+9,/&+3/+&I"(&3%A9&H(".;&,#&
".(&A"?%(,%+3&"I&,#+3(3/+N&
shapiro.test(females$shannon)&
## &
## Shapiro-Wilk normality test&
## &
## data: females$shannon&
## W = 0.94434, p-value = 0.01548&
shapiro.test(males$shannon)&
## &
## Shapiro-Wilk normality test&
## &
## data: males$shannon&
## W = 0.89861, p-value = 0.0004951&
O%/3)&"#&+93&(3/.2+/5&/9".2)&$".&(.#&%&t8+3/+~&
Mann-Whitney+U+Test+
O%/3)&"#&+93/3&(3/.2+/5&,+&,/&-3++3(&+"&+3/+&I"(&),II3(3#A3/&,#&".(&)%+%&./,#H&%&/+%+,/+,A%2&+3/+&+9%+&
)"3/&,#0&(3Z.,(3&#"(F%2&),/+(,-.+,"#/N&!93&4%##8'9,+#3$&[&+3/+&K%P%&',2A"D3#8(%#Pg/.F&+3/+L&
,/&/F,2%(&+"&+93&t8+3/+N&!93&#.22&9$;"+93/,/&"I&+9,/&+3/+&<".2)&-3&+9%+&+9%+&H(".;/&F3%#/&),II3(&
"#2$&-$&A9%#A3N&'3&A%#&)"&%&4%##8'9,+#3$&[&+3/+&./,#H&+93&wilcox.test()&I.#A+,"#&,#&@N&
wilcox.test(females$shannon, males$shannon, na.rm=TRUE)&
## &
## Wilcoxon rank sum test with continuity correction&
## &
## data: females$shannon and males$shannon&
## W = 973, p-value = 0.0295&
## alternative hypothesis: true location shift is not equal to 0&
# This is telling are to use a wilcox test to comapre females&
# and males for the diversity metric we set earlier. It is also&
# telling R to remove any NAs (missing data)&
&
# Note that you would run a t-test in the same way, only using &
# the t.test() function&
!93&;8?%2.3&93(35&<9,A9&,/&23//&+9%#&%#&%2;9%&"I&cNcbN&!9,/&F3%#/&+9%+&<3&")^)40&+93&#.22&
9$;"+93/,/&+9%+&+93/3&+<"&-")$&/,+3/&%(3&,#09(6$,6D64%,0-29.6DD)"),0&I"(&+9,/&F3+(,A&"I&%2;9%&
),?3(/,+$N&
'3&A%#&%2/"&/33&<9%+&+93&-"D&;2"+&2""P/&2,P3N&
alpha2 <- alpha&
alpha2$Sex <- metadata$Sex&
&
ggplot(data=alpha2, aes(x=Sex, y= shannon)) + &
geom_boxplot(outlier.color = NA) + # removes outlier points becuase we add
in the jitter anyways&
geom_jitter(width= 0.1, aes(color=Sex)) +&
theme_bw() +&
guides(color=F) #because the x-axis is already labeled&
&
J#&".(&3D%F;23&<3&"#2$&9%?3&+<"&H(".;/5&`F%23`&%#)&`I3F%23`N&JI&<3&./3)&%&),II3(3#+&A"?%(,%+35&
2,P3&`O")$B,+3`&K<9,A9&A"#+%,#/&b&H(".;/L&<3&<".2)&9%?3&dc&+3/+/&+"&)"&;%,(<,/3N&>.AP,2$5&I"(&
2"";/&A%#&A"F3&+"&+93&(3/A.3m&!9,/&2"";&<,22&;3(I"(F&+93&wilcox.test()&"#&3?3($&.#,Z.3&
A"F-,#%+,"#&"I&H(".;/&,#&+93&A"?%(,%+3N&
# First let's set all the groups available for the variable we care about&
# In this case we will use BodySite instead of what we set as cov1 &
# (because sex only has two values)&
groups <- unique(metadata$BodySite)&
&
# We create empty vectors to store the pair-wise pvalues and the &
# groups tested (names)&
pw.pvalues <- NULL&
pw.names <- NULL&
&
# We set two counters, 'i' starts at 1 and goes until one less than&
# the number of groups. 'j' will start at 2, and go until the full &
# number of groups. This will end up comparing: 1 vs 2, 2 vs 3, &
# 3 vs 4, and so on.&
for(i in 1:(length(groups) - 1)){&
for(j in (i+1):length(groups)){&
#we use this to pick the groups assigned to 'i'&
ix.metric.i <- metadata$BodySite == groups[i]&
#and this for 'j'&
ix.metric.j <- metadata$BodySite == groups[j]&
#this stores the pvalue from the test&
pvalue <- wilcox.test(alpha[ix.metric.i,"shannon"], &
alpha[ix.metric.j,"shannon"])$p.value &
#appends the new p-value to the list&
pw.pvalues <- c(pw.pvalues, pvalue) &
#sets the names of the groups tested&
test.name <- paste(groups[i], "_vs_", groups[j],sep='')&
#appends the names of the groups tested to the list&
pw.names <- c(pw.names, test.name) &
}&
}&
names(pw.pvalues) <- pw.names&
&
pw.pvalues&
## Mid_vagina_vs_Left_Retroauricular_crease &
## 6.925356e-02 &
## Mid_vagina_vs_Saliva &
## 7.431382e-12 &
## Mid_vagina_vs_Subgingival_plaque &
## 3.715691e-12 &
## Mid_vagina_vs_Stool &
## 2.600984e-11 &
## Left_Retroauricular_crease_vs_Saliva &
## 3.482133e-10 &
## Left_Retroauricular_crease_vs_Subgingival_plaque &
## 1.066403e-10 &
## Left_Retroauricular_crease_vs_Stool &
## 2.117572e-09 &
## Saliva_vs_Subgingival_plaque &
## 5.100514e-01 &
## Saliva_vs_Stool &
## 4.815795e-02 &
## Subgingival_plaque_vs_Stool &
## 2.298969e-01&
False+Discovery+Rate+Correction+
'93#&<3&./3&+93&`/3D`&A"?%(,%+35&<3&"#2$&9%?3&"#3&+3/+&+"&;3(I"(FN&JI&<3&%(3&A"F;%(,#H&F"(3&
+9%#&+<"&H(".;/&%#)&<3&%(3&(.##,#H&F.2+,;23&+3/+/&<3&9%?3&+"&A"((3A+&"I&+93&#.F-3(&"I&
A"F;%(,/"#/&<3&%(3&F%P,#HN&'3&)"&+9,/&<,+9&+93&p.adjust()&I.#A+,"#N&!9,/&<,22&A"((3A+&I"(&
+$;3&J&3(("(/5&<9,A9&%(3&(3U3A+,"#/&"I&%&+(.3&#.22&9$;"+93/,/&K%2/"&P#"<#&%/&%&I%2/3&;"/,+,?3LN&
# We will correct using 'fdr', which is the false discovery rate&
fdr.pvalues <- p.adjust(pw.pvalues,'fdr')&
fdr.pvalues&
## Mid_vagina_vs_Left_Retroauricular_crease &
## 8.656695e-02 &
## Mid_vagina_vs_Saliva &
## 3.715691e-11 &
## Mid_vagina_vs_Subgingival_plaque &
## 3.715691e-11 &
## Mid_vagina_vs_Stool &
## 8.669946e-11 &
## Left_Retroauricular_crease_vs_Saliva &
## 6.964267e-10 &
## Left_Retroauricular_crease_vs_Subgingival_plaque &
## 2.666008e-10 &
## Left_Retroauricular_crease_vs_Stool &
## 3.529287e-09 &
## Saliva_vs_Subgingival_plaque &
## 5.100514e-01 &
## Saliva_vs_Stool &
## 6.879708e-02 &
## Subgingival_plaque_vs_Stool &
## 2.554410e-01&
Q"<&<3&A%#&?,3<&+93&(32%+,?3&;8?%2.3/&I"(&3%A9&;%,(<,/3&A"F;%(,/"#5&%#)&<3&A%#&/%?3&+9,/&+%-23&
%/&%&I,23N&
# sink() will write whatever is listed below it to a file. &
# You close that file by listing sink() again.&
sink("alpha_stats.txt")&
&
cat("\nNumber of samples in each group:\n")&
print(table(metadata$BodySite))&
#This prints a table of the number of samples at each body site&
&
cat("\nMean Alpha Diversity:\n")&
print(tapply(alpha$shannon, metadata$BodySite, mean))&
# This will get the mean of alpha diversity at each body site&
# by using tapply() to apply the mean function across the alpha&
# table (subsetted into body site groups)&
&
cat("\nMedian Alpha Diversity:\n")&
print(tapply(alpha$shannon, metadata$BodySite, median))&
# This will get the median of alpha diversity at each body site&
&
cat("\nStandard Deviation:\n")&
print(tapply(alpha$shannon, metadata$BodySite, sd))&
# This will get the standard deviations of alpha diversity at &
# each body site&
&
cat("\nPairwise Mann-Whitney-Wilcoxon Tests were performed.\n")&
cat("Pairwise p-values are:\n")&
print(pw.pvalues)&
&
cat("\nFDR-corrected pairwise p-values are:\n")&
print(p.adjust(pw.pvalues,'fdr'))&
&
sink()&
'3&A%#&%2/"&/33&<9%+&+93&-"D&;2"+&2""P/&2,P3N&
alpha2 <- alpha&
alpha2$BodySite <- metadata$BodySite&
&
ggplot(data=alpha2, aes(x=BodySite, y= shannon)) + &
geom_boxplot() +&
geom_jitter(width= 0.1, aes(color=BodySite)) +&
theme_bw()&
&
'3&A%#&%2/"&;(,#+&+9,/&;2"+&+"&%&;)I&<,+9&+93&pdf()&I.#A+,"#&I"22"<3)&-$&dev.off()&+"&A2"/3&+93&
;)IN&
plot_output <- ggplot(data=alpha2, aes(x=BodySite, y= shannon)) + &
geom_boxplot() +&
geom_jitter(width= 0.1, aes(color=BodySite)) +&
theme_bw() +&
scale_x_discrete(labels=c("ear fold", "vagina", "saliva", "stool",
"plaque")) +&
guides(color=F) #because they are labeled at the x- axis &
&
pdf("Alpha_Diversity.pdf", height=4, width=6)&
plot(plot_output)&
dev.off()&
## quartz_off_screen &
## 2&
Taxa+Summary+Plots+in+R+
a,/.%2,M,#H&<9,A9&+%D%&%(3&,#&$".(&/%F;23/&A%#&-3&%#&3II3A+,?3&<%$&+"&/33&;%++3(#/&,#&+93&)%+%N&
=3(3&<3&<,22&23%(#&9"<&+"&F%P3&+%D%&/.FF%($&;2"+/&-%/3)&"#&$".(&,#;.+&\![&+%-235&%&A"?%(,%+3&
"I&,#+3(3/+5&%#)&"+93(&/;3A,I,3)&;%(%F3+3(/N&
Input+data+
Q='90%<-)9%,.93)0%.%0%9
Y".(&\![&+%-23&/9".2)&-3&2"%)3)&%#)&<3&A%#&./3&+93&(%(3I,3)&?3(/,"#N&Y".(&F3+%)%+%&I,23&
/9".2)&%2/"&-3&2"%)3)N&@3F3F-3(&+"&/.-/3+&+93&+%-23/&/"&+9%+&+93&/%F;23/&JG/&%(3&A"((3A+&%#)&
,#&+93&/%F3&"()3(N&J#&+93&i>"%),#H&!%-23/&,#&@i&/3A+,"#&<3&/%?3)&+93/3&+%-23/&%/&otu2&%#)&
metadata&
Manage+Taxonomy+
'3&<%#+&+"&/3+&%&+%D%&23?32&-$&#.F-3(0&d&k&P,#H)"F&
l&k&;9$2.F&
V&k&A2%//&
]&k&"()3(&
b&k&I%F,2$&
f&k&H3#./&
x&k&/;3A,3/&
>3+`/&<"(P&<,+9&;9$2.F&K23?32&lLN&!93&+%D%&%(3&2,/+3)&<,+9&%&23++3(&(3;(3/3#+,#H&+93&23?32&I"22"<3)&
-$&+<"&.#)3(/A"(3/5&%#)&%&/3F,A"2"#&/3;%(%+,#H&3%A9&23?32&KPggP,#H)"Fv&;gg;9$2.Fv&NNNLN&'3&
<,22&)"&/"F3&/+(,#H&;%(/,#H&+"&(3;2%A3&+93&I.22&+%D"#"F$&2%-32&<,+9&+93&%;;(";(,%+3&23?32N&
# In this example we are using 2 (or phylum). &
# This can be for any level you want.&
level = 2&
&
# First we make an empty table (array) for our new names&
# The array will have the number of rows equal to the number of OTUs in the
table&
# and one column for each taxonomy level &
names_split <- array(dim=c(length(otu2$taxonomy), level))&
&
# We will store our taxonomy as a list of names&
otu_names <- as.character(otu2$taxonomy)&
&
# Then we run through each name and split based on the level we are &
# interested in. We make a for loop to split every name stored in &
# otu_names. strsplit() splits the string (otu_names[i]) at ";". &
# This retains all the levels as separate strings. head() takes the&
# first items (the total will be the number you specified with level)&
# from the string split output and stores it in the names_split &
# array at the specied row.&
for (i in 1:length(otu_names)){&
names_split[i,] <- head(strsplit(otu_names[i], "; ", fixed=T)[[1]],
n=level)&
}&
&
# Now we will collapse the strings together into one string&
otu_names <- apply(names_split, 1, function(x) paste(x[1:level], sep = "",
collapse = ";"))&
&
# Replace the old taxonomy with the truncated version&
otu2$taxonomy <- otu_names&
Q"<&<3&<%#+&+"&A"#/"2,)%+3&".(&\![&+%-23&-$&+93&+%D%&23?32/&<3`?3&/3+5&U./+&2,P3&<3&23%(#3)&,#&
+93&2"%),#H&%#)&F%#,;.2%+,#H&+.+"(,%2N&'3&<,22&./3&+93&aggregate()&I.#A+,"#N&
# Get the number of samples (the last column is taxonomy)&
sample_no <- ncol(otu2)-1&
&
# Collapse the otu table and save it as a new table &
otu3 <- aggregate(otu2[,1:sample_no], by=list(otu2$taxonomy), FUN=sum)&
&
# Name the first column taxonomy because R stores the column&
# we told it to aggregate by as the first column&
names(otu3)[1] <- "taxonomy"&
&
# We can see that the consolidating worked by checking how many rows we &
# now have - that's how many phyla there are (level=2) &
nrow(otu3)&
## [1] 17&
>3+`/&(3;2%A3&+93&("<#%F3/&<,+9&+93&+%D"#"F$5&%#)&H3+&(,)&"I&+93&+%D"#"F$&A"2.F#N&
# Set rownames as taxonomy&
rownames(otu3) <- otu3$taxonomy&
&
# Keep all columns in the otu table that do NOT (!) have the column &
# header "taxonomy"&
otu3 <- otu3[,!names(otu3) == "taxonomy"]&
Filtering+OTUs+and+Samples+
>3+`/&I,2+3(&+93&\![&+%-23&+"&P33;&"#2$&\![/&+9%+&%(3&,#&%+&23%/+&b&;3";235&%#)&+9%+&9%?3&%+&23%/+&
dcc&A".#+/N&
#Set the number of samples cut off&
nsamples <- 5&
&
# `otu > 0` tells R to take all values and see if they are greater &
# than 0. If so it will store it as TRUE, if not greater than 0 they &
# get a false. Then we take the `rowSums()` of that value, where &
# TRUE=1 and FALSE=0. Then we ask if the row sums are greater than &
# then number of samples we set as the cut off. It will store &
# TRUE/FALSE values for each row.&
cutoff_nsamples <- rowSums(otu3 > 0) > nsamples&
# Keep only samples that are 'TRUE' (meet the cutoff value)&
otu3 <- otu3[cutoff_nsamples,]&
&
ncounts <- 99&
# This cutoff is different than the previous. We care about how MANY&
# counts each taxon has. We only want to keep those with a minimum&
# of 100 counts across all samples (greater than 99)&
cutoff_ncounts <- rowSums(otu3) > ncounts&
#Keep only taxa that meet the cutoff&
otu3 <- otu3[cutoff_ncounts,]&
Calculating+relative+abundances+
'3&+""P&".+&/"F3&+%D%&<93#&I,2+3(,#H5&/"&<3&#33)&+"&A"#?3(+&A".#+&,#+"&+93&(32%+,?3&%-.#)%#A3&
"I&+9%+&/%F;23N&!"&)"&+9,/5&<3&<,22&./3&%&I"(&2"";&
# We want to use all the columns (since we already took out taxonomy)&
for(i in 1:ncol(otu3)){&
otu3[,i] <- otu3[,i]/sum(otu3[,i])&
}&
J#&"()3(&+"&F%P3&".(&\![&+%-23&%#)&(3/.2+/&F"(3&3%/,2$&A"F;%+,-23&<,+9&".(&F3+%)%+%5&<3&<%#+&
+93&/%F;23&JG/&%/&+93&("</&%#)&+93&+%D%&%/&+93&A"2.F#/N&'3`22&./3&+93&I.#A+,"#&t()&+"&+(%#/;"/3&
+93&)%+%&I(%F3N&'3&A%#&+93#&F%P3&%&A"2.F#&B%F;23JG&+9%+&<,22&-3&./3I.2&2%+3(N&
# Transpose as a data frame&
otu3 <- data.frame(t(otu3))&
&
# Make a column that is the Sample IDs (which are the rownames)&
otu3$SampleID <- rownames(otu3)&
&
# Let's save a backup of this filtered OTU table&
otu_backup <- otu3&
JI&$".&(3F3F-3(5&ggplot&2,P3/&+"&9%?3&%22&+93&)%+%&,#&"#3&+%-23N&Q"<&<3&<,22&./3&%&I.#A+,"#&
A%223)&melt()&I("F&+93&2,-(%($&reshape2&+"&A"#?3(+&".(&)%+%&I(%F3&,#+"&+9(33&A"2.F#/0&"#3&
+9%+&9%/&+93&/%F;23&JG5&"#3&+9%+&9%/&+%D%&JG/5&%#)&"#3&+9%+&9%/&+93&(32%+,?3&%-.#)%#A3/&"I&+93&
+%D%&,#&".(&/%F;23N&'3`22&%2/"&./3&+93&;%AP%H3&plyr&I"(&,+`/&I.#A+,"#&ddply()&+"&%HH(3H%+3&".(&
)%+%&#,A32$N&
O3I"(3&<3&A%#&F"?3&I"(<%()5&$".&F./+&H3+&+93&;%AP%H3/&#33)3)&+"&(.#&+93&I.#A+,"#/&<3&<,22&
./3N&Y".&A%#&,#/+%22&;%AP%H3/&<,+9&+93&install.packages()&I.#A+,"#N&
#You'll want to install these packages if you don't already have them&
library(reshape2) &
library(plyr)&
&
otu3 <- melt(otu3, id.vars = "SampleID", &
variable.name = "Taxa", &
value.name = "RelativeAbundance")&
Plotting+
Q"<&<3&9%?3&%&I,2+3(3)&+%-23&<,+9&+9(33&A"2.F#/&<,+9&<9,A9&<3&A%#&F%P3&%&-%/,A&+%D%&/.FF%($&
;2"+&KU./+&H(".;3)&-$&/%F;23&JGLN&'3`22&./3&ggplotN&
library(ggplot2)&
# This will make a plot with the OTU table (otu), using the column &
# headers specified&
ggplot(otu3, aes(x = SampleID, y = RelativeAbundance, fill= Taxa)) + &
geom_bar(stat = "identity", position="fill") + # This makes it a bar plot
(geom_bar())&
scale_x_discrete(labels = NULL) #This takes off the x-labels (too hard
to read)&
&
!9,/&;2"+&,/&P,#)&"I&F3//$m&!93(3&%(3&/"&F%#$&/%F;23/&$".&A%#`+&3%/,2$&/33&"#3&/%F;23&I("F&
%#"+93(N&
Adding+Metadata+
>3+`/&+($&;,2"+,#H&-$&%&A"?%(,%+3N&!93&3%/,3/+&<%$&+"&)"&+9%+&,/&+"&/,F;2$&%))&".(&F3+%)%+%&?%2.3/&
+"&".(&+%-23N&4"(3&A"2.F#/&F3%#/&F"(3&;"+3#+,%2&?%(,%-23/&+"&;2"+&-$N&>3+`/&H"&-%AP&+"&".(&I.225&
I,2+3(3)&\![&+%-235&F32+&,+&%#)&+93#&%))&F3+%)%+%N&
otu3 <- otu_backup&
otu3 <- melt(otu3, &
id.vars = "SampleID", &
variable.name = "Taxa", &
value.name = "RelativeAbundance")&
Q"<&<3&A%#&%))&,#&".(&F3+%)%+%5&./,#H&+93&I.#A+,"#&merge()N&S,(/+&23+`/&F%P3&/.(3&<3&9%?3&+93&
A"?%(,%+3/R93%)3(&#%F3/&<3&+9,#P&<3&)"5&%#)&<3&A%#&(3#%F3&%#$&+9%+&%(3#`+&(,H9+5&%#)&"#2$&
P33;&+93&"#3/&<3`(3&,#+3(3/+3)&,#N&JI&$".`(3&2""P,#H&%+&(3%2&F3+%)%+%5&$".`22&9%?3&%&F.A9&2"#H3(&
2,/+&+9%#&+93&+.+"(,%2&I,23/N&
colnames(metadata)&
## [1] "BarcodeSequence" "LinkerPrimerSequence" "Sex" &
## [4] "BodySite" "SRS_SampleID" "FASTA_FILE" &
## [7] "Description" "Age"&
# We only want to keep "Sex", "BodySite", and &
# "Description", which is the area of the body&
&
#This will keep only the columns with the headers we want&
columns_keep <- c("Sex","BodySite","Description")&
metadata2 <- metadata[,columns_keep]&
&
# Now we merge covariates to sample ids&
# First we need to make a column that is the sample IDs in the&
metadata2$SampleID <- rownames(metadata2)&
&
# This will drop any samples in the mapping file that aren't in the OTU table&
otu3 <- merge(otu3, metadata2, by="SampleID")&
Q"<&<3&A%#&;2"+&%AA"(),#H&+"&-")$&/,+3N&
ggplot(otu3, aes(x=BodySite, y=RelativeAbundance, fill=Taxa)) +&
# using position="fill" makes sure it sums to 1&
geom_bar(stat ="identity", position="fill") &
&
# We will want to shorten the x-labels.&
# We can even split our data up by sex using this method, &
# using an option called facet_grid(): &
ggplot(otu3, aes(x=BodySite, y=RelativeAbundance, fill=Taxa)) + &
geom_bar(stat ="identity", position="fill") +&
facet_grid(.~Sex) + # This will separate by sex&
scale_x_discrete(labels=c("LRC", "MV", "Sal.","Stool","SGP")) # This
relabels the x axis&
&
Plot+Specific+Taxa+
'3&A%#&%2/"&;2"+&U./+&/;3A,I,A&+%D%N&S"(&+9%+5&<3&A%#&./3&+93&%HH(3H%+3)&(32%+,?3&%-.#)%#A3&
+%-23&otu&I("F&+93&sex&,#&+93&3D%F;23&%-"?35&%#)&;.22&".+&%&/.-/3+&"I&+93&+%D%&<3`(3&/;3A,I,A%22$&
,#+3(3/+3)&,#N&Y".`22&#33)&+93&3D%A+&+%D%&2%-32/&I("F&+93&+%-23&+"&F%+A9N&B%$&<3&<%#+&+"&2""P&%+&
S,(F,A.+3/&%#)&7A+,#"-%A+3(,%0&
# If we don't remember the spelling, we can print all the taxa and &
# copy and paste: &
unique(otu3$Taxa)&
## [1] k__Bacteria.p__Cyanobacteria k__Bacteria.p__Tenericutes &
## [3] k__Bacteria.p__Fusobacteria k__Bacteria.p__Bacteroidetes &
## [5] k__Bacteria.p__Verrucomicrobia k__Bacteria.p__TM7 &
## [7] k__Bacteria.p__Actinobacteria k__Bacteria.p__Spirochaetes &
## [9] k__Bacteria.p__Proteobacteria k__Bacteria.p__Firmicutes &
## 10 Levels: k__Bacteria.p__Actinobacteria ...
k__Bacteria.p__Verrucomicrobia&
# Let's subset to just Bacteroidetes and Actinobacteria&
taxaList <- c("k__Bacteria.p__Bacteroidetes",
"k__Bacteria.p__Actinobacteria")&
&
# Let's make a new subsetted table that is just those phyla&
filtered <- subset(otu3, is.element(otu3$Taxa, taxaList))&
'3&;2"+&+9,#H/&+93&/%F35&F%P,#H&/.(3&,#0&+"&./3&+93&";+,"#&position="fill"5&/,#A3&".(&
%-.#)%#A3/&#"<&/9".2)&,#0&%))&.;&+"&dN&>3+`/&F%P3&".(&2%-32/&%&2,++23&#,A3(5&%/&<322N&
ggplot(filtered, aes(x = Sex, y = RelativeAbundance, fill=Taxa)) + &
geom_bar(stat="identity") + &
labs(y = "Relative Abundance") + &
scale_fill_discrete(labels = c("Actinobacteria", "Bacteroidetes")) + &
scale_x_discrete(labels = c("Female", "Male"))&
&
!93(3&%(3&%2F"/+&.#2,F,+3)&;%(%F3+3(/&+9%+&$".&A%#&;2%$&<,+9&+"&A9%#H3&+93&%A+.%2&2""P&"I&$".(&
;2"+/N&O32"<&<3&./3&theme_bw()&+"&F%P3&+93&-%APH(".#)&<9,+35&%#)&F"),I,3)&+93&A"2"(/&-$&
F%P,#H&%&A"2"(&?%(,%-23&cols&+9%+&<3&./3&+"&A"2"(&+93&),II3(3#+&+%D%&<,+9&
scale_fill_manual()N&
cols <- c("purple","yellow")&
&
#Note that we have to use scale_fill_manual() instead of scale_fill_discrete &
# to specify colors&
ggplot(filtered, aes(x = Sex, y = RelativeAbundance, fill=Taxa)) + &
geom_bar(stat="identity") + &
labs(y = "Relative Abundance") + &
scale_x_discrete(labels = c("Female", "Male")) +&
theme_bw() + &
scale_fill_manual(labels = c("Actinobacteria", "Bacteroidetes"),
values=cols)&
&
Differentiated+OTUs+in+R+
'3&A%#&+3/+&I"(&+%D%&"(&\![&+9%+&%(3&),II3(3#+,%22$&%-.#)%#+&%A("//&/%F;23&+$;3/N&!"&)"&+9,/5&<3&
#33)&+"&I,(/+&+(%#/I"(F&".(&)%+%&".+&"I&+93&/,F;23DN&!9,/&F3%#/&<3&<%#+&+"&H"&I("F&<"(P,#H&
<,+9&A"F;"/,+,"#%2&)%+%&+"&#"#8A"F;"/,+,"#%2&)%+%N&
Inputs+
!93&,#;.+&)%+%&$".&#33)&,#A2.)3&+93&F3+%)%+%&%#)&$".(&\![&+%-23&+9%+&9%/&2"<&)3;+9&/%F;23/&
(3F"?3)N&G"#`+&./3&+93&(%(3I,3)&\![&+%-23N&!93&+%-23/&/9".2)&-3&/.-/3++3)&%#)&"()3(3)&I"(&
/%F;23&JGN&
S,(/+&<3&<,22&+%P3&+93&+%D"#"F$&".+&"I&+93&\![&+%-235&I,2+3(&2"<&%-.#)%#+&\![/&%#)&2"<&
"AA.((,#H&\![/0&
# We can store taxonomy and which OTUs they are to use for later&
# drop=F makes sure it stays as a table&
taxonomy_table <- otu_low2[,"taxonomy",drop=F] &
&
#Keep only the samples, drop taxonomy from table&
otu_low3 <- otu_low2[, ! names(otu_low2) =="taxonomy"]&
&
#Filter OTUs that are in low abundance&
#Change those less than 1/1 millionth of read depth to 0&
otu_low3[otu_low3 < sum(colSums(otu_low3))/1000000] <- 0&
&
#Change singletons to 0 (needed for low depth OTU tables)&
otu_low3[otu_low3 < 2] <- 0&
&
#Filter the OTU table to keep OTUs in at least 5% of samples&
otu_low3 <- otu_low3[rowSums(otu_low3 > 0) > (0.05*ncol(otu_low3)),]&
Q"<&<3&<,22&+(%#/I"(F&+93&)%+%&./,#H&%&A3#+3(3)&2"H8(%+,"&+(%#/I"(F%+,"#N&!9,/&#33)/&+93&
robCompositions&;%AP%H3N&
library(robCompositions)&
## Warning: package 'robCompositions' was built under R version 3.3.2&
## Loading required package: robustbase&
## Warning: package 'robustbase' was built under R version 3.3.2&
## Loading required package: data.table&
## &
## Attaching package: 'data.table'&
## The following objects are masked from 'package:reshape2':&
## &
## dcast, melt&
## Loading required package: e1071&
## Warning: package 'e1071' was built under R version 3.3.2&
## Loading required package: pls&
## Warning: package 'pls' was built under R version 3.3.2&
## &
## Attaching package: 'pls'&
## The following object is masked from 'package:stats':&
## &
## loadings&
## sROC 0.1-2 loaded&
## &
## Attaching package: 'robCompositions'&
## The following object is masked from 'package:robustbase':&
## &
## alcohol&
#Convert any 0 to 0.65 to allow for CLR transform&
#Ref: Palarea-Albaladejo J, et al. 2014. JOURNAL OF CHEMOMETRICS. A bootstrap
estimation scheme for chemical compositional data with nondetects. 28;7:585
599.&
otu_low3[otu_low3 == 0] <- 0.65&
&
#Centered log-ratio transform for compositions&
#Ref: Gloor GB, et al. 2016. ANNALS OF EPIDEMIOLOGY. It's all relative:
analyzing microbiome data as compositions. 26;5:322-329.&
&
#convert to samples as rows&
otu_table <- t(otu_low3)&
&
#Centered log-ratio tranform the data &
otu_table <- cenLR(otu_table)$x.clr &
Test+For+Differences+
Q"<&".(&"+.&+%-23&9%/&/%F;23/&%/&("</&%#)&\![/&%/&/%F;23/N&'3&A%#&#"<&2"";&+9(".H9&+93&
\![/&%#)&+3/+&I"(&),II3(3#A3/&%AA"(),#H&+"&".(&F3+%)%+%N&>3+/&+3/+&I"(&),II3(3#A3/&-$&-")$/,+3N&
O3A%./3&<3&+(%#/I"(F3)&".(&)%+%5&<3&A%#&#"<&./3&;%(%F3+(,A&+3/+/&+"&2""P&I"(&),II3(3#+,%+3)&
\![/N&'3&A%#&./3&7Q\a7&"(&+8+3/+&)3;3#),#H&"#&+93&#.F-3(&"I&H(".;/&+"&+3/+N&
# Let's test the first OTU (first column) in the OTU table&
# What is the name of this OTU? We can look it up in our table&
# We pick the row we want using the otu id in the column&
this_taxa <- taxonomy_table[colnames(otu_table)[1],"taxonomy"]&
this_taxa&
## [1] "k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales;
f__Lactobacillaceae; g__Lactobacillus; s__"&
# Now lets run the test using the first column and according to bodysites in
the metadata&
aov_test <- aov(otu_table[,1] ~ metadata$BodySite)&
summary(aov_test)&
## Df Sum Sq Mean Sq F value Pr(>F) &
## metadata$BodySite 4 47.77 11.942 27.95 1.91e-15 ***&
## Residuals 97 41.44 0.427 &
## ---&
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1&
!93&".+;.+&I("F&aov()&,/&F"(3&A"F;2,A%+3)&+9%#&kruskal.test()N&aov()&".+;.+&,/&%&2,/+&+9%+&
,#A2.)3/&,#I"(F%+,"#&%-".+&+93&)3H(33/&"I&I(33)"F&KDfL5&+93&B.F&"I&BZ.%(3/&KSum SqL5&+93&
43%#&BZ.%(3&KMean SqL5&+93&S&/+%+,/+,AR(%+,"&KF valueLv&%#)&+93&X8?%2.3&KPr(>F)LN&S"(&#"<5&<3&
%(3&,#+3(3/+3)&,#&+93&NSE%-1)5&<9,A9&A%#&-3&,#)3D3)&I("F&summary(aov_test)&<,+9&
summary(aov_test)[[1]][1,5]N&
summary(aov_test)[[1]][1,5]&
## [1] 1.913061e-15&
>3+`/&;2"+&+9,/&3D%F;23&%#)&/33&<9%+&,+&2""P/&2,P3N&
# Because ggplot likes to have all the data in one table, let's make a new
table to plot with&
plot_table <- data.frame(otu_table)&
#Note that this will store and x infront of all the numerical column names&
plot_table$BodySite <- metadata$BodySite&
&
#store which column (header) you want to plot &
this_otu <- colnames(plot_table)[1]&
&
# We can also store its name&
# will split the taxonomy based on the ";"&
# Then take the last two values (genus and species) to shorten the name&
name = strsplit(this_taxa, ";", fixed=T)[[1]]&
names_tail = tail(name, n=2)&
&
# This will plot the transformed abundnces for each body site&
# Note that we have to use aes_string() because we are filling in the y
column header with a string&
ggplot(plot_table) +&
geom_boxplot(aes_string(x="BodySite", y=this_otu, fill="BodySite")) +&
scale_fill_manual(values = c("tomato", "darkorchid4",
"gold","tan4","dodgerblue")) +&
scale_x_discrete(labels=c("ear fold", "vagina", "saliva", "stool",
"plaque")) +&
labs(y=names_tail)&
&
Test+All+Taxa+
[/,#H&I"(82"";/&<3&A%#&%;;2$&+9,/&+3/+&+"&%22&"I&+93&\![/&,#&".(&+%-23N&J#&+93&2"";5&+93&
+(%#/I"(F3)&%-.#)%#A3&"I&3%A9&\![&I"(&%22&/%F;23/&<,22&-3&%//,H#3)&+"&+93&?%(,%-23&`$`5&+93&
.)N),.),09E%"6%<-)N&!93&#3D+&2,#3&,#&+93&2"";&A%22/&+93&aov()&I.#A+,"#5&%#)&+93&2%/+&2,#3&%//,H#/&
3%A9&;8?%2.3&+"&%&;?%2/&?3A+"(N&
#The first step is to make an empty vector that will store our p-values.&
pvals <- c()&
&
#Loop through each column except the last (because it's body site)&
for(i in 1:(ncol(plot_table)-1)){&
aov_out <- aov(plot_table[,i] ~ plot_table$BodySite)&
pvals[i] <- summary(aov_out)[[1]][1,5]&
}&
Find+Significant+p-Values+
>3+`/&/33&9"<&F%#$&;8?%2.3/&%(3&/,H#,I,A%#+&I"(&3%A9&A"?%(,%+3N&'3&<,22&./3&%#&%-NB%&"I&cNcbN&
sum(pvals < 0.05)&
## [1] 579&
False+Discovery+Rate+
O3A%./3&<3&),)&/"&F%#$&/+%+,/+,A%2&A"F;%(,/"#/5&<3&#33)&+"&A"((3A+&I"(&+$;3&J&3(("(/&K(3U3A+,"#&
"I&%&+(.3&#.22&9$;"+93/,/5&%2/"&P#"<#&%/&%&false:positiveLN&*"#+("22,#H&+93&I%2/3&),/A"?3($&(%+3&
932;/&+"&A"#+("2&+93&3D;3A+3)&;(";"(+,"#&"I&I%2/3&;"/,+,?3/N&!"&)"&+9,/&<3&./3&+93&p.adjust()&
I.#A+,"#&<,+9&+93&'fdr&;%(%F3+3(N&
pvals.fdr = p.adjust(pvals, "fdr")&
>3+`/&/33&9"<&F%#$&;8?%2.3/&%(3&/,H#,I,A%#+&I"(&3%A9&A"?%(,%+3&%I+3(&+93&I%2/3&),/A"?3($&(%+3&
A"((3A+,"#N&
sum(pvals.fdr < 0.05)&
## [1] 577&
Plotting+Significant+OTUs+
JI&<3&<%#+3)&+"&;2"+&%22&+93&/,H#,I,A%#+2$&),II3(3#+&+%D%&<3&A".2)&)"&/"&<,+9&%&I"(&2"";N&'3&<,22&
;2"+&+93&I,(/+&+9(33&/,H#,I,A%#+2$&),II3(3#+&+%D%&%A("//&+93&-")$&/,+3N&
# Index just the first three significantly different OTUs&
# which() tells us the position of the values that are true (< 0.05), and
[1:3]&
# takes the first 3.&
first_three <- which(pvals.fdr < 0.05)[1:3] &
&
# This loops through the significant OTUs, stores their name &
# and makes a box plot of the transformed abundances of the taxa&
# We then store the plots in a list&
plot_list <- list()&
for(i in 1:length(first_three)){&
index <- first_three[[i]]&
this_otu <- colnames(plot_table)[i]&
this_taxa <- taxonomy_table[i,"taxonomy"]&
name <- strsplit(this_taxa, ";", fixed=T)[[1]]&
taxon <- paste(name[6], name[7], sep=" ")&
# Note that we have to use aes_string() because we are filling in the y
column header with a string&
plot_out <- ggplot(plot_table) +&
geom_boxplot(aes_string(x="BodySite", y=this_otu, fill="BodySite")) +&
scale_fill_manual(values = c("tomato", "darkorchid4",
"gold","tan4","dodgerblue")) +&
scale_x_discrete(labels=c("ear fold", "vagina", "saliva", "stool",
"plaque")) +&
labs(y=taxon)&
plot_list[[i]] <- plot_out&
}&
&
# Now lets print the three plots to a pdf&
# each plot will be a new page in the pdf&
pdf("Diff_taxa.pdf", height=4, width=6)&
for(i in 1:length(plot_list)){&
plot(plot_list[[i]])&
}&
dev.off()&
## quartz_off_screen &
## 2&
PCoA+in+R+
'3&./3&hJJ41&+"&A%2A.2%+3&".(&),/+%#A3&F%+(,A3/&./,#H&beta_diversity.py&"(&
beta_diversity_through_plots.py&A"FF%#)N&'3&+93#&A%#&./3&@&+"&F%P3&lG&X*"7&;2"+/&"I&
+9,/&)%+%N&>3+`/&/+%(+&<,+9&+93&;%AP%H3/&<3&#33)&+"&2"%)N&JI&+93/3&%(3&#"+&,#/+%223)&$".&A%#&,#/+%22&
+93F&I,(/+&<,+9&install.packages()N&
library(ape)&
library(vegan)&
library(ggplot2)&
Load+Data+
@#1R--9,)).90#9B%E)92#1"9<)0%9.6E)"(6029%,.9&)0%.%0%9D6-)(9-#%.).9%,.9(1<()00).90#90B)9
4#"")409,1&<)"9%,.9#".)"9#D9(%&N-)(O&
Principal+Coordinates+Analysis+
Q"<&<3&A%#&./3&+93&I.#A+,"#&pcoa()&I("F&+93&@&;%AP%H3&ape&+"&%A+.%22$&A%2A.2%+3&".(&;(,#A,;%2&
A""(),#%+3&?3A+"(/N&!"&F%P3&;2"++,#H&3%/,3(5&<3&/%?3&+93&?3A+"(/&%/&%&)%+%&I(%F35&/3+&.;&#3<&
A"2.F#&+,+23/5&%#)&%))&%&A"2.F#&"I&/%F;23&JG/N&
# Run the pcoa() function on the beta diversity table,&
# and store the vectors generated as a dataframe &
PCOA <- data.frame(pcoa(beta)$vectors)&
&
# If you look at the PCOA table, you'll see the column names &
# are the 'axes' and the row names are sample IDs. We want them to &
# be labeled "PC" instead of "axis"&
&
# We will make a vector with place holders&
new_names <- rep("", ncol(PCOA))&
&
# Fill in first with PC followed by the number (e.g. PC1, PC2, PC3...)&
for(i in 1:ncol(PCOA)){&
new_names[i] <- paste("PC",i, sep="")&
}&
&
# Replace the column names of PCOA&
names(PCOA) <- new_names&
&
# Create a column that is SampleIDS for PCOA&
PCOA$SampleID <- rownames(PCOA)&
&
#Create a column that is SampleIDs for the metadata&
metadata$SampleID <- rownames(metadata)&
&
# Merge the metadata and beta diversity&
PCOA <- merge(PCOA, metadata, by = "SampleID")&
Plotting+the+PCoA+
Q"<&$".&9%?3&%&)%+%&I(%F3&+9%+&9%/&%22&"I&$".(&X*\7&?3A+"(/&%#)&%22&+93&(323?%#+&F3+%)%+%5&
F%+A93)&.;&-$&/%F;23&JGN&J#&+9,/&3D%F;23&<3&<,22&;2"+&+93&I,(/+&+<"&;(,#A,;%2&A""(),#%+3/&KX*d&
%#)&X*lLN&JI&$".&(3F3F-3(5&+93&I,(/+&;(,#A,;%2&A""(),#%+3/&/9".2)&3D;2%,#&+93&F%U"(,+$&"I&+93&
?%(,%+,"#&,#&+93&)%+%N&!93/3&<,22&-3&;(3++$&/,F;23&/A%++3(&;2"+/N&
# Note that geom_point() makes it a scatter plot where the points &
# are colored according to BodySite&
ggplot(PCOA) + &
geom_point(aes(x = PC1, y = PC2, color = BodySite)) + &
labs(title="PCoA Plot")&
&
# Now let's add some clusters. This makes it look great, but can &
# also be misleading and make us think there are groups when there &
# aren't. Note that we are using BodySite to color the points and body &
# AREA to fill the clusters&
ggplot(PCOA) + &
geom_point(aes(x = PC1, y = PC2, color = BodySite)) + &
labs(title="PCoA and Clusters") + &
stat_ellipse(alpha = 0.3, geom="polygon", linetype="blank", aes(x = PC1, y
= PC2, fill = Description))&
&
Q"+,A3&+9%+&+93&A"2"(&"I&+93&322,;/3/&)"#`+&(3%22$&F%+A9&+93&A"2"(&"I&+93&;",#+/&+93$&%(3&
A2./+3(,#HN&!93&A"2"(/&%(3&)3+3(F,#3)&-$&<9,A9&"()3(&+93&-")$&%(3%&,/&I%A+"(3)&-$N&'3&A%#&
F%P3&+9,/&"()3(&2,#3&.;&<,+9&+93&"()3(&"I&+93&-")$&/,+3/N&
# Check order of levels of body area (Description)&
levels(PCOA$Description)&
## [1] "Gastrointestinal_tract" "Oral" &
## [3] "Skin" "Urogenital_tract"&
# Check order of levels in BodySite&
levels(PCOA$BodySite)&
## [1] "Left_Retroauricular_crease" "Mid_vagina" &
## [3] "Saliva" "Stool" &
## [5] "Subgingival_plaque"&
# Reset levels of Bodysite to match levels of body area&
PCOA$BodySite <- factor(PCOA$BodySite, levels = &
c("Stool", "Saliva", "Subgingival_plaque", &
"Left_Retroauricular_crease", "Mid_vagina"))&
&
#Replot&
ggplot(PCOA) + &
geom_point(aes(x = PC1, y = PC2, color = BodySite)) + &
labs(title="PCoa and Clusters") + &
stat_ellipse(alpha = 0.3, geom="polygon", linetype="blank", &
aes(x = PC1, y = PC2, fill = Description))&
&
Changing+Plotting+Parameters+
!93&I"22"<,#H&2"#H&A"FF%#)&+9("</&%&<9"23&;,23&"I&A./+"F,M%+,"#&-322/&%#)&<9,/+23/&%+&HH;2"+&8&
+93&I,22&A"2"(/&%(3&+<3%P3)5&+93&;",#+/&%(3&%&-,+&-,HH3(5&+93&I"#+&/,M3/&%(3&-,HH3(N&!9,/&,/&U./+&+"&
H,?3&$".&%&+%/+3&"I&%22&+93&),II3(3#+&%3/+93+,A&";+,"#/&$".&A%#&;2%$&%(".#)&<,+9N&Y".&/9".2)&+($&
F"),I$,#H&3%A9&;%(%F3+3(&%#)&/33&<9%+&,+&)"3/&+"&+93&;2"+N&
ggplot(PCOA) + &
stat_ellipse(alpha = 0.3, geom="polygon", linetype="blank", &
aes(x = PC1, y = PC2, fill = Description)) + &
geom_point(size = 3, aes(x = PC1, y = PC2, color = BodySite)) + &
labs(title="Human Microbiome Betadiversity") + &
scale_color_discrete(name = "Body Site", &
labels = c("Ear Fold","Vagina", "Saliva", "Stool","Plaque")) + &
scale_fill_hue(h.start = 20,&
name = "Body Area", &
labels = c("GI Tract", "Oral","Skin", "Urogenital Tract")) + &
theme(panel.background = element_rect(color = "grey97"), &
plot.title = element_text(size = 16), &
axis.title = element_text(size = 14), &
axis.text = element_text(size = 12),&
legend.title = element_text(size = 14),&
legend.text = element_text(size = 12)) + &
theme_bw() +&
guides(color = guide_legend(override.aes = list(fill = "grey97", size =
4)), &
fill = guide_legend(override.aes=list(shape = NA)) )&
&
Testing+for+Signifcant+Differences+
:./+&2,P3&<3&),)&I"(&%2;9%&),?3(/,+$5&<3&A%#&+3/+&I"(&/,H#,I,A%#+&),II3(3#A3/&,#&-3+%&),?3(/,+$N&S"(&
3D%F;235&23+`/&/%$&<3&<%#+&+"&+3/+&I"(&/,H#,I,A%#+&),II3(3#A3/&-3+<33#&-")$&/,+3/&<,+9&+93&
[#<3,H9+3)&[#,S(%A&)%+%N&'3&%2(3%)$&2"%)3)&+9%+&)%+%&<93#&(%#&,+&+9(".H9&+93&pcoa()&
I.#A+,"#&%-"?3N&O.+&/,#A3&<3&%(3&23%(#,#H&23+`/&2"%)&,+&%H%,#&/"&<3&A%#&H3+&A"FI"(+%-23&<,+9&+93&
A")3N&
%.#,6(9
%.#,6(&,/&%&#"#8;%(%F3+(,A&/+%+,/+,A%2&+3/+5&<9,A9&F3%#/&,+&./3/&;3(F.+%+,"#/&"I&+93&)%+%&+"&
)3+3(F,#3&+93&;8?%2.35&"(&/+%+,/+,A%2&/,H#,I,A%#A3N&
J+&(3Z.,(3/0&
u&%&),/+%#A3&F%+(,D&I,235&/.A9&%/&%&[#,S(%A&),/+%#A3&F%+(,D&
u&%&F%;;,#H&I,235&%#)&%&A%+3H"($&,#&+93&F%;;,#H&I,23&+"&)3+3(F,#3&/%F;23&H(".;,#H&I("F&
J+&A"F;.+3/&%#&?X&?%2.3&K3II3A+&/,M3L&<9,A9&/9"</&+93&;3(A3#+%H3&"I&?%(,%+,"#&3D;2%,#3)&-$&+93&
/.;;2,3)&F%;;,#H&I,23&A%+3H"($5&%/&<322&%/&%&NSE%-1)&+"&)3+3(F,#3&+93&/+%+,/+,A%2&/,H#,I,A%#A3N&
4"(3&,#I"(F%+,"#&"I&+93&%)"#,/&+3/+&A%#&-3&I".#)&93(30&
9++;0RRZ,,F3N"(HR+.+"(,%2/RA%+3H"($gA"F;%(,/"#N9+F25&
9++;0RRAAN".2.NI,RsU%(,"P/%R/"I+932;R?3H%#R9+F2R%)"#,/N9+F2&
# Turn the beta table into resemblance matrix using as.dist() &
beta_dist = as.dist(beta)&
&
# Test for a significant difference across all groups. &
# This will run an ADONIS test.&
ad = adonis(beta_dist ~ metadata[,"BodySite"], data=metadata,
permutations=999)&
ad&
## &
## Call:&
## adonis(formula = beta_dist ~ metadata[, "BodySite"], data = metadata,
permutations = 999) &
## &
## Permutation: free&
## Number of permutations: 999&
## &
## Terms added sequentially (first to last)&
## &
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) &
## metadata[, "BodySite"] 4 12.725 3.1811 15.789 0.39433 0.001 ***&
## Residuals 97 19.544 0.2015 0.60567 &
## Total 101 32.269 1.00000 &
## ---&
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1&
I#0)M&
!"&,#),A%+3/&+9%+&%+&%#&%2;9%&"I&cNcb5&+93&H(".;,#H&"I&/%F;23/&-$&`O")$B,+3`&,/&/+%+,/+,A%22$&
/,H#,I,A%#+N&
!93&?X&?%2.3&,#),A%+3/&+9%+&%;;("D,F%+32$&Vet&"I&+93&?%(,%+,"#&,#&),/+%#A3/&,/&3D;2%,#3)&-$&+9,/&
H(".;,#HN&J+`/&,F;"(+%#+&-3A%./3&%&;8?%2.3&A%#&,#),A%+3&/,H#,I,A%#A3&-.+&<3&F./+&%2/"&#"+,A3&
9"<&F.A9&"I&+93&?%(,%+,"#&+93&,#;.+&?%(,%-23/&A"#+(,-.+3N&
Q"<&23+`/&<(,+3&".(&".+;.+&+"&%&I,23N&
# This takes just the analysis of variance table (aoc.tab) &
# from the output&
a.table <- ad$aov.tab&
&
# This writes it to a file&
write.table(a.table, file="analysis.txt", quote=FALSE, sep="\t", col.names =
NA)&

Navigation menu