#运行Ubuntu之前,先在电脑D盘创建目录win16s,并将样本测序数据文件(raw_data.zip),mapping.txt与qiime_params.txt放在此目录下。 #此流程所有命令都需在WSL-Ubuntu的bash终端下运行(以下命令前面的“$”代表bash提示符,不需要输入)。 #Navigate to your working directory $cd /mnt/d/win16s/ #准备raw reads ~This pipeline assumes that you have an input folder of paired-end reads files, with the default "_R1" and "_R2" containing the forward and reverse reads, respectively. $unzip raw_data.zip $ls raw_data #显示文件列表,如SAA_R1.fastq.gz, SAA_R2.fastq.gz ...,如果文件名不对,需要重新命名 #1.验证mapping file ~ 按样本名称修改mapping.txt,可忽略Barcode与Primer序列为空的错误 validate_mapping_file.py -m mapping.txt -o validate_map #2.合并双端序列 $mdir join_pe_reads #$vsearch --fastq_mergepairs raw_data/SAA_R1.fastq.gz --reverse raw_data/SAA_R2.fastq.gz --fastqout join_pe_reads/SAA.fq #多个样品数据 $for i in `tail -n+2 mapping.txt |cut -f 1`; do vsearch --fastq_mergepairs raw_data/${i}_R1.fastq.gz --reverse raw_data/${i}_R2.fastq.gz --fastqout join_pe_reads/${i}.fq ; done #3.序列质量控制 ~ Using FastQC to check read length firstly, reads with length < 380 are filtered $mkdir qual_filter #$vsearch --fastx_filter join_pe_reads/SAA.fq --fastq_maxee 1.0 --fastq_minlen 380 --fastqout join_pe_reads/SAA_filter.fq #多样品数据,直接copy命令到terminal执行 for i in `tail -n+2 mapping.txt |cut -f 1`; do vsearch --fastx_filter join_pe_reads/${i}.fq --fastq_maxee 1.0 --fastq_minlen 380 --fastqout qual_filter/${i}.fq ; done #4.split libraries $mkdir split_out $multiple_split_libraries_fastq.py -i qual_filter/ -o split_out/ --demultiplexing_method sampleid_by_file #5.Dereplication(序列去重复) vsearch --derep_fulllength split_out/seqs.fna --output split_out/seqs_derep.fa --sizeout --minuniquesize 2 #6.嵌合体检测 #$vsearch --uchime_ref split_out/seqs_derep.fa --db rdp_gold.fa --nonchimeras seqs_nochimeras.fa #需要参考序列文件 $vsearch --uchime_denovo split_out/seqs_derep.fa --nonchimeras seqs_nochimeras.fa #7.OTU clustering(聚类) ~the longest sequence is picked as the cluster representative. $vsearch --cluster_fast seqs_nochimeras.fa --id 0.97 --centroids otus.fa --relabel OTU_ #8.Assign taxonomy to OTUs using the RDP classifier on QIIME(分类注释) $assign_taxonomy.py -i otus.fa -m rdp -o taxonomy #OTU taxonomy was then determined using the Ribosomal Database Project classifier retrained toward the Greengenes database of 13_8 version #9.Map reads back to the OTU data $vsearch --usearch_global split_out/seqs.fna --db otus.fa --strand plus --id 0.97 --otutabout otus_table.txt #10.Convert otu_table.txt to otu-table.biom $biom convert -i otus_table.txt -o otus_table.biom --to-hdf5 --table-type "OTU table" $biom add-metadata -i otus_table.biom -o otus_table_tax.biom --observation-metadata-fp taxonomy/otus_tax_assignments.txt --sc-separated taxonomy --observation-header OTUID,taxonomy #11.Check OTU Table on QIIME $biom summarize-table -i otus_table_tax.biom -o summary_biom.txt #记录数据量最少的样本的reads数(Counts),用于后续的统计抽样(步骤16中的-e参数) $biom convert -i otus_table_tax.biom -o otus_table_tsv.txt --table-type "OTU table" --to-tsv --header-key taxonomy #open otu_table_tsv.txt with Excel to modify any error, then convert back to biom format $biom convert -i otus_table_tsv.txt -o otus_table_tsv.biom --table-type "OTU table" --to-hdf5 --table-type="OTU table" --process-obs-metadata taxonomy #12.可视化 ~要先运行Xming #Plot relative abundance for all samples $summarize_taxa_through_plots.py -i otus_table_tax.biom -m mapping.txt -o taxa_summary #Create a summary for at species level $summarize_taxa.py -i otus_table_tax.biom -o taxonomy_summary_species -L 7 $plot_taxa_summary.py -i taxonomy_summary_species/otus_table_tax_L7.txt -o taxonomy_summary_species #13.Align sequences on QIIME using the ClustalW or muscle method $align_seqs.py -i otus.fa -m clustalw -o rep_set_align #14.Filter alignments on QIIME $filter_alignment.py -i rep_set_align/otus_aligned.fasta -o rep_set_align #15.Make the reference tree on QIIME $make_phylogeny.py -i rep_set_align/otus_aligned_pfiltered.fasta -o rep_set.tre #16.Run diversity analyses on QIIME $core_diversity_analyses.py -i otus_table.biom -m mapping.txt -t rep_set.tre -e 10000 -o core_diversity -p qiime_params.txt #The parameter “-e 10000” is the sequencing depth to use for even sub-sampling and maximum rarefaction depth. You should review the output of the 'biom summarize-table' command (summary_biom.txt)to decide on this value. #17.Making PCoA plots $make_2d_plots.py -i core_diversity/bdiv_even10000/weighted_unifrac_pc.txt -o pcoa_plot -m mapping.txt -b Group #-i参数后输入文件所在目录的数字(10000)会随前面命令的-e参数变化 #18.统计两组间的差异: $compare_categories.py --method anosim -i core_diversity/bdiv_even10000/weighted_unifrac_dm.txt -m mapping.txt -c Group -o anosim_out -n 99