使用Guppy
Early downstream analysis components such as **barcoding/demultiplexing, adapter trimming and alignment **are contained within Guppy. Furthermore, Guppy now performs modified basecalling (5mC, 6mA and CpG) from the raw signal data, producing an additional FAST5 file of modified base probabilities.
#!/bin/bash
#SBATCH --time=1:00:00
#SBATCH --mem=2g
dir=/data/Nanopore
guppy_dir=${dir}/Fastq
log_dir=${dir}/log/guppy
job_dir=${dir}/job/guppy
mkdir -p ${log_dir}
mkdir -p ${job_dir}
mkdir -p ${guppy_dir}
thread=14
cd /data_dir/Nanopore/Fast5
for i in $(ls -d $pwd *);do
job_file="${job_dir}/${i}.job"
echo "#!/bin/bash
#SBATCH --job-name=${i}.guppy.job
#SBATCH --output=$log_dir/${i}.guppy.out
#SBATCH --time=24:00:00
#SBATCH --partition=gpu
#SBATCH --cpus-per-task=${thread}
#SBATCH --mem=16g
#SBATCH --gres=lscratch:200,gpu:v100:1
module load guppy/3.2.2
data_dir=/data_dir/Nanopore/Fast5/${i}
out_dir=${guppy_dir}/${i}
mkdir -p \${out_dir}/fastq_pass
guppy_basecaller --input_path \${data_dir} \
--flowcell FLO-MIN106 --kit SQK-RNA002 \
-x cuda:all \
--records_per_fastq 0 \
--save_path \${out_dir}
cd \${out_dir}
cat *.fastq > ${guppy_dir}/${i}.fastq
mv *.fastq \${out_dir}/fastq_pass
" > $job_file
sbatch $job_file
done