<?xml version="1.0" encoding="UTF-8"?>
<EXPERIMENT_SET xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <EXPERIMENT accession="SRX11528791" alias="DF15_WT_ONT_RNAseq">
    <IDENTIFIERS>
      <PRIMARY_ID>SRX11528791</PRIMARY_ID>
      <SUBMITTER_ID namespace="SUB10063423">DF15_WT_ONT_RNAseq</SUBMITTER_ID>
    </IDENTIFIERS>
    <TITLE>ONT RNAseq of DF15: multiple myeloma cell-line</TITLE>
    <STUDY_REF accession="SRP330017">
      <IDENTIFIERS>
        <PRIMARY_ID>SRP330017</PRIMARY_ID>
        <EXTERNAL_ID namespace="BioProject">PRJNA749325</EXTERNAL_ID>
      </IDENTIFIERS>
    </STUDY_REF>
    <DESIGN>
      <DESIGN_DESCRIPTION>We performed basecalling on the raw fast5 data using Guppy (v) (guppy_basecaller compress-fastq -c dna_r9.4.1_450bps_hac.cfg -x cuda:1) in GPU mode from Oxford Nanopore Technologies running on a GTX 1080 Ti graphics card. For each read we identify the barcode and UMI sequence by searching for the polyA region and flanking regions before and after the barcode/UMI. Accurately sequenced barcodes were identified based on their dual nucleotide complementarity. Unambiguous barcodes were then used as a guide to error correct the ambiguous barcodes in a second pass correction analysis approach. We performed fuzzy searching using a Levenshtein distance of 4 (unless otherwise stated in the figure legend) and replaced the original ambiguous barcode with the unambiguous sequence. A whitelist of barcodes was then generated using UMI-tools whitelist (umi_tools whitelist --bc-pattern=CCCCCCCCCCCCCCCCCCCCCCCCNNNNNNNNNNNNNNNN --set-cell-number=1000) [3]. This whitelist was used to assess the quality of our cells to read count ratio and used as an input for UMI-tools extract. Next the barcode and UMI sequence of each read was extracted and placed within the read2 header file using UMI-tools extract (umi_tools extract --bc-pattern=CCCCCCCCCCCCCCCCCCCCCCCCNNNNNNNNNNNNNNNN --whitelist=whitelist.txt). Reads were then aligned to the transcriptome using minimap2 [10] (-ax splice -uf --MD --sam-hit-only --junc-bed) using the reference transcriptome for human hg38 and mouse mm10. The resulting sam file was converted to a bam file and then sorted and indexed using samtools [11]. The transcript name was then added as a XT tag within the bam file using pysam.</DESIGN_DESCRIPTION>
      <SAMPLE_DESCRIPTOR accession="SRS9566055">
        <IDENTIFIERS>
          <PRIMARY_ID>SRS9566055</PRIMARY_ID>
          <SUBMITTER_ID namespace="pda|nansaripour@orcid">DF15</SUBMITTER_ID>
        </IDENTIFIERS>
      </SAMPLE_DESCRIPTOR>
      <LIBRARY_DESCRIPTOR>
        <LIBRARY_NAME>DF15_WT_ONT_RNAseq</LIBRARY_NAME>
        <LIBRARY_STRATEGY>RNA-Seq</LIBRARY_STRATEGY>
        <LIBRARY_SOURCE>TRANSCRIPTOMIC SINGLE CELL</LIBRARY_SOURCE>
        <LIBRARY_SELECTION>cDNA</LIBRARY_SELECTION>
        <LIBRARY_LAYOUT>
          <SINGLE/>
        </LIBRARY_LAYOUT>
      </LIBRARY_DESCRIPTOR>
    </DESIGN>
    <PLATFORM>
      <OXFORD_NANOPORE>
        <INSTRUMENT_MODEL>PromethION</INSTRUMENT_MODEL>
      </OXFORD_NANOPORE>
    </PLATFORM>
  </EXPERIMENT>
</EXPERIMENT_SET>
